Initial mirror of gmp library to third-party-mirror PiperOrigin-RevId: 722724718 Change-Id: I3e40b965dda5bdb3cb4beb3fc84c9483dfc4d31e

commit: de13a668a7c04b75b9dea160298aeed62ce0a5ca [log] [tgz]
author: Googler <noreply@google.com> Mon Feb 03 19:06:00 2025 +0000
committer: Sagnik Ghosh <sagnikghosh@google.com> Mon Feb 03 12:06:35 2025 -0800
tree: 50a00fcb1239037901b56583c328f6635af73323
diff --git a/README.google.md b/README.google.md
new file mode 100644
index 0000000..8bb7b73
--- /dev/null
+++ b/README.google.md

@@ -0,0 +1,7 @@
+This is GMP, the GNU Multiple Precision Arithmetic Library. It is the primary
+bignum library used in google3 C++.
+
+Google tools generally treat GMP as if it were licensed under the GNU GPL.
+However, it is actually licensed under the LGPL--only its unit tests are covered
+by the GPL. You may thus use GMP in your product just as you would use any other
+LGPL'd library.

diff --git a/asl.h b/asl.h
new file mode 100644
index 0000000..64deea0
--- /dev/null
+++ b/asl.h

@@ -0,0 +1,127 @@
+/* asl.h -- artificially small limbs support by means of C++ operator
+   overloading.
+
+Copyright 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdint>
+#include <cstdlib>
+// #include <stdexcept>
+
+#ifndef GMP_ASSERT_ALWAYS
+#define GMP_ASSERT_ALWAYS(cc) do {if (!(cc)) abort();} while (0)
+#endif
+
+// Missing: post++ post-- ++pre --prec bool(limb) !limb
+
+#ifndef GMP_LIMB_BITS
+#define GMP_LIMB_BITS 4
+#endif
+
+#define GMP_NUMB_MASK (2 * (1ul << (GMP_LIMB_BITS - 1)) - 1)
+
+#define BINOP_MASK(op, type)				\
+  mp_limb_t& operator op##=(const type& rhs) {		\
+    limbo = (limbo op rhs.limbo) & GMP_NUMB_MASK;	\
+    return *this;					\
+  }
+#define BINOP_NOMASK(op, type)				\
+  mp_limb_t& operator op##=(const type& rhs) {		\
+    limbo = limbo op rhs.limbo;				\
+    return *this;					\
+  }
+
+typedef std::conditional<(GMP_NUMB_MASK <= 0xffff), uint16_t, uint32_t >::type type24;
+typedef std::conditional<(GMP_NUMB_MASK <= 0xff), uint8_t, type24>::type mtype;
+
+class mp_limb_t {
+public:
+  mp_limb_t() {}	// put random garbage in limbo?
+  mp_limb_t(const unsigned int rhs) { limbo = rhs & GMP_NUMB_MASK; }
+  // mp_limb_t(const mp_limb_t& rhs) { limbo = rhs.limbo; } // Causes havoc
+  BINOP_MASK(+, mp_limb_t)
+  BINOP_MASK(-, mp_limb_t)
+  BINOP_MASK(*, mp_limb_t)
+  BINOP_NOMASK(/, mp_limb_t)
+  BINOP_NOMASK(%, mp_limb_t)
+  BINOP_NOMASK(&, mp_limb_t)
+  BINOP_NOMASK(|, mp_limb_t)
+  BINOP_NOMASK(^, mp_limb_t)
+  mp_limb_t& operator<<=(const unsigned int rhs) {
+    GMP_ASSERT_ALWAYS (rhs < GMP_LIMB_BITS);
+    limbo = (limbo << rhs) & GMP_NUMB_MASK;
+    return *this;
+  }
+  mp_limb_t& operator>>=(const unsigned int rhs) {
+    GMP_ASSERT_ALWAYS (rhs < GMP_LIMB_BITS);
+    limbo = limbo >> rhs;
+    return *this;
+  }
+  mp_limb_t operator-() {
+    return static_cast<mp_limb_t>((-limbo) & GMP_NUMB_MASK);
+    // mp_limb_t x;  x.limbo = (-limbo) & GMP_NUMB_MASK;  return x;
+  }
+  mp_limb_t operator~() {
+    return static_cast<mp_limb_t>((~limbo) & GMP_NUMB_MASK);
+    // mp_limb_t x;  x.limbo = (~limbo) & GMP_NUMB_MASK;  return x;
+  }
+  operator unsigned int() const { return limbo; }
+  operator          int() const { return limbo; }
+
+#define RELOP(op)							\
+  inline bool operator op(const mp_limb_t rhs) {			\
+    return limbo op rhs.limbo;						\
+  }
+  RELOP(==)
+  RELOP(!=)
+  RELOP(<)
+  RELOP(>)
+  RELOP(<=)
+  RELOP(>=)
+
+private:
+  mtype limbo;
+};
+
+#define BINOP2(op, type)						\
+  inline mp_limb_t operator op(mp_limb_t lhs, const type& rhs) {	\
+    lhs op##= rhs;							\
+    return lhs;								\
+  }
+
+BINOP2(+, mp_limb_t)
+BINOP2(-, mp_limb_t)
+BINOP2(*, mp_limb_t)
+BINOP2(/, mp_limb_t)
+BINOP2(%, mp_limb_t)
+BINOP2(&, mp_limb_t)
+BINOP2(|, mp_limb_t)
+BINOP2(^, mp_limb_t)
+BINOP2(<<, unsigned int)
+BINOP2(>>, unsigned int)

diff --git a/assert.c b/assert.c
new file mode 100644
index 0000000..aef95b2
--- /dev/null
+++ b/assert.c

@@ -0,0 +1,58 @@
+/* GMP assertion failure handler.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+
+
+void
+__gmp_assert_header (const char *filename, int linenum)
+{
+  if (filename != NULL && filename[0] != '\0')
+    {
+      fprintf (stderr, "%s:", filename);
+      if (linenum != -1)
+        fprintf (stderr, "%d: ", linenum);
+    }
+}
+
+void
+__gmp_assert_fail (const char *filename, int linenum,
+                   const char *expr)
+{
+  __gmp_assert_header (filename, linenum);
+  fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
+  abort();
+}

diff --git a/bootstrap.c b/bootstrap.c
new file mode 100644
index 0000000..07eb416
--- /dev/null
+++ b/bootstrap.c

@@ -0,0 +1,133 @@
+/* Functions needed for bootstrapping the gmp build, based on mini-gmp.
+
+Copyright 2001, 2002, 2004, 2011, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define MINI_GMP_DONT_USE_FLOAT_H 1
+#include "mini-gmp/mini-gmp.c"
+
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define PTR(x)   ((x)->_mp_d)
+#define SIZ(x)   ((x)->_mp_size)
+
+#define xmalloc gmp_default_alloc
+
+int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 32)
+    return (0xa08a28acUL >> t) & 1;
+  if ((t & 1) == 0)
+    return 0;
+
+  if (t % 3 == 0)
+    return 0;
+  if (t % 5 == 0)
+    return 0;
+  if (t % 7 == 0)
+    return 0;
+
+  for (d = 11;;)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+	return 1;
+      if (r == 0)
+	break;
+      d += 2;
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+	return 1;
+      if (r == 0)
+	break;
+      d += 4;
+    }
+  return 0;
+}
+
+int
+log2_ceil (int n)
+{
+  int  e;
+  assert (n >= 1);
+  for (e = 0; ; e++)
+    if ((1 << e) >= n)
+      break;
+  return e;
+}
+
+/* Set inv to the inverse of d, in the style of invert_limb, ie. for
+   udiv_qrnnd_preinv.  */
+void
+mpz_preinv_invert (mpz_t inv, const mpz_t d, int numb_bits)
+{
+  mpz_t  t;
+  int    norm;
+  assert (SIZ(d) > 0);
+
+  norm = numb_bits - mpz_sizeinbase (d, 2);
+  assert (norm >= 0);
+  mpz_init (t);
+  mpz_setbit (t, 2*numb_bits - norm);
+  mpz_tdiv_q (inv, t, d);
+  mpz_clrbit (inv, numb_bits);
+
+  mpz_clear (t);
+}
+
+/* Calculate r satisfying r*a == 1 mod 2^n. */
+void
+mpz_invert_2exp (mpz_t r, const mpz_t a, unsigned long n)
+{
+  mpz_t  mod;
+
+  assert (mpz_odd_p (a));
+
+  mpz_init (mod);
+  mpz_setbit (mod, n);
+
+  mpz_invert (r, a, mod);
+
+  mpz_clear (mod);
+}
+
+/* Calculate inv satisfying r*a == 1 mod 2^n. */
+void
+mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
+{
+  mpz_t  az;
+
+  mpz_init_set_ui (az, a);
+  mpz_invert_2exp (r, az, n);
+  mpz_clear (az);
+}

diff --git a/build_defs.bzl b/build_defs.bzl
new file mode 100644
index 0000000..0faa537
--- /dev/null
+++ b/build_defs.bzl

@@ -0,0 +1,144 @@
+"""Build extensions for //third_party/gmp.
+
+Architectures: Several macros in this file refer to "architecture names". These
+are the names for architectures used by GMP, not the names for architectures
+used at Google. Available names are defined by directories in
+third_party/gmp/mpn.
+"""
+
+def gmp_assembly(src, out, architectures, operation = None, included_srcs = []):
+    """Converts a GMP .asm file into architecture-specific .s files.
+
+    GMP's assembly code is not written directly in assembly (files ending in .s)
+    but rather in a slightly higher-level mixture of assembly and M4 (files
+    ending in .asm). The M4 is really just for configuration tweaks and some
+    quality-of-life improvements; it doesn't make the assembly any less
+    architecture-specific. This Starlark macro creates genrules to preprocess
+    the .asm files with M4.
+
+    Args:
+      src: The path to the input .asm file.
+      out: The path to the output .s file.
+      architectures: The list of architecture names (strings) for which .s files
+        should be generated.
+      operation: Optional. The name of the function defined in the .asm file. If
+        specified, M4 will be invoked with `-DOPERATION_{operation}`. (This is
+        necessary because GMP can generate multiple functions from the same
+        assembly skeleton.)
+      included_srcs: Optional. A list of all .asm files that are included (via
+        M4 inclusion) by the .asm file in `src`. Files in this list are copied
+        into the genrule sandbox but are not passed on the command line to M4.
+        Specify this if .s generation fails with M4 complaining about missing
+        files.
+
+    Example:
+      gmp_assembly(
+          src = "mpn/{architecture}/aors_n.asm",
+          out = "mpn/{architecture}/add_n.s",
+          architectures = [
+              "arm",
+              "arm64",
+              "x86",
+              "x86_64",
+          ],
+          operation = "add_n",
+      )
+    """
+    for architecture in architectures:
+        full_src = src.format(architecture = architecture)
+        native.genrule(
+            name = out.format(architecture = architecture).replace("/", "_").replace(".", "_"),
+            srcs = [
+                full_src,
+                "google-config-%s/config.m4" % architecture,
+                "mpn/asm-defs.m4",
+                "mpn/%s/%s-defs.m4" % (architecture, architecture),
+            ] + [src.format(architecture = architecture) for src in included_srcs],
+            outs = [out.format(architecture = architecture)],
+            tools = ["//third_party/m4"],
+            cmd = """
+                $(location //third_party/m4) -DHAVE_CONFIG_H -D__GMP_WITHIN_GMP {operation} -DPIC -I $(RULEDIR)/mpn/{architecture} -I third_party/gmp/mpn/{architecture} -I $(RULEDIR)/mpn -I third_party/gmp/mpn -I $(RULEDIR)/google-config-{architecture} {full_src} >$@
+            """.format(
+                architecture = architecture,
+                full_src = full_src,
+                operation = "-DOPERATION_" + operation if operation else "",
+            ) + select({
+                # Add noexec stack note to the generated assembly
+                "//third_party/bazel_platforms/cpu:armv7": """printf '\t.section\t.note.GNU-stack,"",%%progbits\n' >>$@""",
+                "//conditions:default": """printf '\t.section\t.note.GNU-stack,"",@progbits\n' >>$@""",
+            }),
+        )
+
+def gmp_mpn_assembly(src, out, architectures, included_srcs = []):
+    """Converts a GMP MPN .asm file into architecture-specific .s files.
+
+    MPN is GMP's architecture-specific layer and where most of its assembly code
+    exists. This macro is a convenience macro for invoking `gmp_assembly` on MPN
+    files. It allows you to simply specify the basenames of the .asm and .s
+    files, and it infers the desired operation from the output file name.
+
+    Args:
+      src: The basename of the input .asm file (inside `mpn/{architecture}`).
+      out: The basename of the output .s file (inside `mpn/{architecture}`).
+      architectures: The list of architecture names (strings) for which .s files
+        should be generated.
+      included_srcs: Optional. Basenames of all .asm files (inside
+        `mpn/{architecture}`) that are included (via M4 inclusion) by the .asm
+        file in `src`. Files in this list are copied into the genrule sandbox
+        but are not passed on the command line to M4. Specify this if .s
+        generation fails with M4 complaining about missing files.
+
+    Example:
+      gmp_mpn_assembly(
+          src = "aors_n.asm",
+          out = "add_n.s",
+          architectures = [
+              "arm",
+              "arm64",
+              "x86",
+              "x86_64",
+          ],
+      )
+    """
+    gmp_assembly(
+        src = "mpn/{architecture}/%s" % src,
+        out = "mpn/{architecture}/%s" % out,
+        architectures = architectures,
+        operation = out[:out.rfind(".")],
+        included_srcs = ["mpn/{architecture}/%s" % src for src in included_srcs],
+    )
+
+def gmp_mpn_library(name, srcs, operation, deps = []):
+    """Runs the C++ compiler to generate a GMP MPN object.
+
+    MPN is GMP's architecture-specific layer. MPN libraries require some special
+    compiler flags; this macro wraps `cc_library` to add them. Use it instead of
+    `cc_library` to compile libraries inside third_party/gmp/mpn.
+
+    Args:
+      name: A unique name for this target.
+      srcs: The list of C and assembly files that are processed to create the
+        library target.
+      operation: The name of the function defined in the C or assembly file. The
+        C preprocessor will be invoked with `-DOPERATION_{operation}`. (This is
+        necessary because GMP can generate multiple functions from the same
+        skeleton.)
+      deps: The list of other libraries that the library target depends on.
+    """
+    native.cc_library(
+        name = name,
+        srcs = [
+            "longlong.h",
+        ] + srcs,
+        local_defines = [
+            "HAVE_CONFIG_H",
+            "__GMP_WITHIN_GMP",
+            "OPERATION_" + operation,
+        ],
+        deps = [":gmp-stage2"] + deps,
+        includes = [
+            "",
+            "mpn",
+        ],
+        copts = ["-w"],
+    )

diff --git a/compat.c b/compat.c
new file mode 100644
index 0000000..b4b44ce
--- /dev/null
+++ b/compat.c

@@ -0,0 +1,59 @@
+/* Old function entrypoints retained for binary compatibility.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+/* mpn_divexact_by3 was a function in gmp 3.0.1, but as of gmp 3.1 it's a
+   macro calling mpn_divexact_by3c.  */
+mp_limb_t
+__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  return mpn_divexact_by3 (dst, src, size);
+}
+
+
+/* mpn_divmod_1 was a function in gmp 3.0.1 and earlier, but marked obsolete
+   in both gmp 2 and 3.  As of gmp 3.1 it's a macro calling mpn_divrem_1. */
+mp_limb_t
+__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  return mpn_divmod_1 (dst, src, size, divisor);
+}
+
+
+/* mpz_legendre was a separate function in gmp 3.1.1 and earlier, but as of
+   4.0 it's a #define alias for mpz_jacobi.  */
+int
+__gmpz_legendre (mpz_srcptr a, mpz_srcptr b)
+{
+  return mpz_jacobi (a, b);
+}

diff --git a/config.in b/config.in
new file mode 100644
index 0000000..ee1ef8c
--- /dev/null
+++ b/config.in

@@ -0,0 +1,671 @@
+/* config.in.  Generated from configure.ac by autoheader.  */
+
+/*
+
+Copyright 1996-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.
+*/
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* The gmp-mparam.h file (a string) the tune program should suggest updating.
+   */
+#undef GMP_MPARAM_H_SUGGEST
+
+/* Define to 1 if you have the `alarm' function. */
+#undef HAVE_ALARM
+
+/* Define to 1 if alloca() works (via gmp-impl.h). */
+#undef HAVE_ALLOCA
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
+#undef HAVE_ALLOCA_H
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((const)) */
+#undef HAVE_ATTRIBUTE_CONST
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((malloc)) */
+#undef HAVE_ATTRIBUTE_MALLOC
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((mode (XX)))
+   */
+#undef HAVE_ATTRIBUTE_MODE
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((noreturn)) */
+#undef HAVE_ATTRIBUTE_NORETURN
+
+/* Define to 1 if you have the `attr_get' function. */
+#undef HAVE_ATTR_GET
+
+/* Define to 1 if tests/libtests has calling conventions checking for the CPU
+   */
+#undef HAVE_CALLING_CONVENTIONS
+
+/* Define to 1 if you have the `clock' function. */
+#undef HAVE_CLOCK
+
+/* Define to 1 if you have the `clock_gettime' function */
+#undef HAVE_CLOCK_GETTIME
+
+/* Define to 1 if you have the `cputime' function. */
+#undef HAVE_CPUTIME
+
+/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
+   */
+#undef HAVE_DECL_FGETC
+
+/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
+   */
+#undef HAVE_DECL_FSCANF
+
+/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
+   */
+#undef HAVE_DECL_OPTARG
+
+/* Define to 1 if you have the declaration of `sys_errlist', and to 0 if you
+   don't. */
+#undef HAVE_DECL_SYS_ERRLIST
+
+/* Define to 1 if you have the declaration of `sys_nerr', and to 0 if you
+   don't. */
+#undef HAVE_DECL_SYS_NERR
+
+/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
+   */
+#undef HAVE_DECL_UNGETC
+
+/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
+   don't. */
+#undef HAVE_DECL_VFPRINTF
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define one of the following to 1 for the format of a `double'.
+   If your format is not among these choices, or you don't know what it is,
+   then leave all undefined.
+   IEEE_LITTLE_SWAPPED means little endian, but with the two 4-byte halves
+   swapped, as used by ARM CPUs in little endian mode.  */
+#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
+#undef HAVE_DOUBLE_VAX_D
+#undef HAVE_DOUBLE_VAX_G
+#undef HAVE_DOUBLE_CRAY_CFP
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define to 1 if you have the <float.h> header file. */
+#undef HAVE_FLOAT_H
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the `getrusage' function. */
+#undef HAVE_GETRUSAGE
+
+/* Define to 1 if you have the `getsysinfo' function. */
+#undef HAVE_GETSYSINFO
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((visibility))
+   and __attribute__ ((alias)) */
+#undef HAVE_HIDDEN_ALIAS
+
+/* Define one of these to 1 for the host CPU family.
+   If your CPU is not in any of these families, leave all undefined.
+   For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
+#undef HAVE_HOST_CPU_FAMILY_alpha
+#undef HAVE_HOST_CPU_FAMILY_m68k
+#undef HAVE_HOST_CPU_FAMILY_power
+#undef HAVE_HOST_CPU_FAMILY_powerpc
+#undef HAVE_HOST_CPU_FAMILY_x86
+#undef HAVE_HOST_CPU_FAMILY_x86_64
+
+/* Define one of the following to 1 for the host CPU, as per the output of
+   ./config.guess.  If your CPU is not listed here, leave all undefined.  */
+#undef HAVE_HOST_CPU_alphaev67
+#undef HAVE_HOST_CPU_alphaev68
+#undef HAVE_HOST_CPU_alphaev7
+#undef HAVE_HOST_CPU_m68020
+#undef HAVE_HOST_CPU_m68030
+#undef HAVE_HOST_CPU_m68040
+#undef HAVE_HOST_CPU_m68060
+#undef HAVE_HOST_CPU_m68360
+#undef HAVE_HOST_CPU_powerpc604
+#undef HAVE_HOST_CPU_powerpc604e
+#undef HAVE_HOST_CPU_powerpc750
+#undef HAVE_HOST_CPU_powerpc7400
+#undef HAVE_HOST_CPU_supersparc
+#undef HAVE_HOST_CPU_i386
+#undef HAVE_HOST_CPU_i586
+#undef HAVE_HOST_CPU_i686
+#undef HAVE_HOST_CPU_pentium
+#undef HAVE_HOST_CPU_pentiummmx
+#undef HAVE_HOST_CPU_pentiumpro
+#undef HAVE_HOST_CPU_pentium2
+#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_pentium4
+#undef HAVE_HOST_CPU_core2
+#undef HAVE_HOST_CPU_nehalem
+#undef HAVE_HOST_CPU_westmere
+#undef HAVE_HOST_CPU_sandybridge
+#undef HAVE_HOST_CPU_ivybridge
+#undef HAVE_HOST_CPU_haswell
+#undef HAVE_HOST_CPU_broadwell
+#undef HAVE_HOST_CPU_skylake
+#undef HAVE_HOST_CPU_silvermont
+#undef HAVE_HOST_CPU_goldmont
+#undef HAVE_HOST_CPU_tremont
+#undef HAVE_HOST_CPU_k8
+#undef HAVE_HOST_CPU_k10
+#undef HAVE_HOST_CPU_bulldozer
+#undef HAVE_HOST_CPU_piledriver
+#undef HAVE_HOST_CPU_steamroller
+#undef HAVE_HOST_CPU_excavator
+#undef HAVE_HOST_CPU_zen
+#undef HAVE_HOST_CPU_bobcat
+#undef HAVE_HOST_CPU_jaguar
+#undef HAVE_HOST_CPU_s390_z900
+#undef HAVE_HOST_CPU_s390_z990
+#undef HAVE_HOST_CPU_s390_z9
+#undef HAVE_HOST_CPU_s390_z10
+#undef HAVE_HOST_CPU_s390_z196
+#undef HAVE_HOST_CPU_s390_z13
+#undef HAVE_HOST_CPU_s390_z14
+#undef HAVE_HOST_CPU_s390_z15
+
+/* Define to 1 iff we have a s390 with 64-bit registers.  */
+#undef HAVE_HOST_CPU_s390_zarch
+
+/* Define to 1 if the system has the type `intmax_t'. */
+#undef HAVE_INTMAX_T
+
+/* Define to 1 if the system has the type `intptr_t'. */
+#undef HAVE_INTPTR_T
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <invent.h> header file. */
+#undef HAVE_INVENT_H
+
+/* Define to 1 if you have the <langinfo.h> header file. */
+#undef HAVE_LANGINFO_H
+
+/* Define one of these to 1 for the endianness of `mp_limb_t'.
+   If the endianness is not a simple big or little, or you don't know what
+   it is, then leave both undefined. */
+#undef HAVE_LIMB_BIG_ENDIAN
+#undef HAVE_LIMB_LITTLE_ENDIAN
+
+/* Define to 1 if you have the `localeconv' function. */
+#undef HAVE_LOCALECONV
+
+/* Define to 1 if you have the <locale.h> header file. */
+#undef HAVE_LOCALE_H
+
+/* Define to 1 if the system has the type `long double'. */
+#undef HAVE_LONG_DOUBLE
+
+/* Define to 1 if the system has the type `long long'. */
+#undef HAVE_LONG_LONG
+
+/* Define to 1 if you have the <machine/hal_sysinfo.h> header file. */
+#undef HAVE_MACHINE_HAL_SYSINFO_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `memset' function. */
+#undef HAVE_MEMSET
+
+/* Define to 1 if you have the `mmap' function. */
+#undef HAVE_MMAP
+
+/* Define to 1 if you have the `mprotect' function. */
+#undef HAVE_MPROTECT
+
+/* Define to 1 each of the following for which a native (ie. CPU specific)
+    implementation of the corresponding routine exists.  */
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addmul_2
+#undef HAVE_NATIVE_mpn_addmul_3
+#undef HAVE_NATIVE_mpn_addmul_4
+#undef HAVE_NATIVE_mpn_addmul_5
+#undef HAVE_NATIVE_mpn_addmul_6
+#undef HAVE_NATIVE_mpn_addmul_7
+#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdiv_dbm1c
+#undef HAVE_NATIVE_mpn_bdiv_q_1
+#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#undef HAVE_NATIVE_mpn_cnd_add_n
+#undef HAVE_NATIVE_mpn_cnd_sub_n
+#undef HAVE_NATIVE_mpn_com
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_div_qr_1n_pi1
+#undef HAVE_NATIVE_mpn_div_qr_2
+#undef HAVE_NATIVE_mpn_divexact_1
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcd_11
+#undef HAVE_NATIVE_mpn_gcd_22
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
+#undef HAVE_NATIVE_mpn_modexact_1_odd
+#undef HAVE_NATIVE_mpn_modexact_1c_odd
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_mul_3
+#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mullo_basecase
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_divrem_1
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_redc_1
+#undef HAVE_NATIVE_mpn_redc_2
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1add_nc
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsh1sub_nc
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_sublsh2_n
+#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_tabselect
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_umul_ppmm_r
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
+
+/* Define to 1 if you have the `nl_langinfo' function. */
+#undef HAVE_NL_LANGINFO
+
+/* Define to 1 if you have the <nl_types.h> header file. */
+#undef HAVE_NL_TYPES_H
+
+/* Define to 1 if you have the `obstack_vprintf' function. */
+#undef HAVE_OBSTACK_VPRINTF
+
+/* Define to 1 if you have the `popen' function. */
+#undef HAVE_POPEN
+
+/* Define to 1 if you have the `processor_info' function. */
+#undef HAVE_PROCESSOR_INFO
+
+/* Define to 1 if <sys/pstat.h> `struct pst_processor' exists and contains
+   `psp_iticksperclktick'. */
+#undef HAVE_PSP_ITICKSPERCLKTICK
+
+/* Define to 1 if you have the `pstat_getprocessor' function. */
+#undef HAVE_PSTAT_GETPROCESSOR
+
+/* Define to 1 if the system has the type `ptrdiff_t'. */
+#undef HAVE_PTRDIFF_T
+
+/* Define to 1 if the system has the type `quad_t'. */
+#undef HAVE_QUAD_T
+
+/* Define to 1 if you have the `raise' function. */
+#undef HAVE_RAISE
+
+/* Define to 1 if you have the `read_real_time' function. */
+#undef HAVE_READ_REAL_TIME
+
+/* Define to 1 if you have the `sigaction' function. */
+#undef HAVE_SIGACTION
+
+/* Define to 1 if you have the `sigaltstack' function. */
+#undef HAVE_SIGALTSTACK
+
+/* Define to 1 if you have the `sigstack' function. */
+#undef HAVE_SIGSTACK
+
+/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
+#undef HAVE_SPEED_CYCLECOUNTER
+
+/* Define to 1 if you have the <sstream> header file. */
+#undef HAVE_SSTREAM
+
+/* Define to 1 if the system has the type `stack_t'. */
+#undef HAVE_STACK_T
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if the system has the type `std::locale'. */
+#undef HAVE_STD__LOCALE
+
+/* Define to 1 if you have the `strchr' function. */
+#undef HAVE_STRCHR
+
+/* Define to 1 if you have the `strerror' function. */
+#undef HAVE_STRERROR
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strnlen' function. */
+#undef HAVE_STRNLEN
+
+/* Define to 1 if you have the `strtol' function. */
+#undef HAVE_STRTOL
+
+/* Define to 1 if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Define to 1 if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define to 1 if you have the `sysctl' function. */
+#undef HAVE_SYSCTL
+
+/* Define to 1 if you have the `sysctlbyname' function. */
+#undef HAVE_SYSCTLBYNAME
+
+/* Define to 1 if you have the `syssgi' function. */
+#undef HAVE_SYSSGI
+
+/* Define to 1 if you have the <sys/attributes.h> header file. */
+#undef HAVE_SYS_ATTRIBUTES_H
+
+/* Define to 1 if you have the <sys/iograph.h> header file. */
+#undef HAVE_SYS_IOGRAPH_H
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define to 1 if you have the <sys/processor.h> header file. */
+#undef HAVE_SYS_PROCESSOR_H
+
+/* Define to 1 if you have the <sys/pstat.h> header file. */
+#undef HAVE_SYS_PSTAT_H
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#undef HAVE_SYS_RESOURCE_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/sysctl.h> header file. */
+#undef HAVE_SYS_SYSCTL_H
+
+/* Define to 1 if you have the <sys/sysinfo.h> header file. */
+#undef HAVE_SYS_SYSINFO_H
+
+/* Define to 1 if you have the <sys/syssgi.h> header file. */
+#undef HAVE_SYS_SYSSGI_H
+
+/* Define to 1 if you have the <sys/systemcfg.h> header file. */
+#undef HAVE_SYS_SYSTEMCFG_H
+
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the `times' function. */
+#undef HAVE_TIMES
+
+/* Define to 1 if the system has the type `uint_least32_t'. */
+#undef HAVE_UINT_LEAST32_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `vsnprintf' function and it works properly. */
+#undef HAVE_VSNPRINTF
+
+/* Define to 1 for Windos/64 */
+#undef HOST_DOS64
+
+/* Assembler local label prefix */
+#undef LSYM_PREFIX
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#undef LT_OBJDIR
+
+/* Define to 1 to disable the use of inline assembly */
+#undef NO_ASM
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#undef RETSIGTYPE
+
+/* The size of `mp_limb_t', as computed by sizeof. */
+#undef SIZEOF_MP_LIMB_T
+
+/* The size of `unsigned', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED
+
+/* The size of `unsigned long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG
+
+/* The size of `unsigned short', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_SHORT
+
+/* The size of `void *', as computed by sizeof. */
+#undef SIZEOF_VOID_P
+
+/* Define to 1 if sscanf requires writable inputs */
+#undef SSCANF_WRITABLE_INPUT
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Maximum size the tune program can test for SQR_TOOM2_THRESHOLD */
+#undef TUNE_SQR_TOOM2_MAX
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to 1 to enable ASSERT checking, per --enable-assert */
+#undef WANT_ASSERT
+
+/* Define to 1 to enable GMP_CPU_TYPE faking cpuid, per --enable-fake-cpuid */
+#undef WANT_FAKE_CPUID
+
+/* Define to 1 when building a fat binary. */
+#undef WANT_FAT_BINARY
+
+/* Define to 1 to enable FFTs for multiplication, per --enable-fft */
+#undef WANT_FFT
+
+/* Define to 1 to enable old mpn_mul_fft_full for multiplication, per
+   --enable-old-fft-full */
+#undef WANT_OLD_FFT_FULL
+
+/* Define to 1 if --enable-profiling=gprof */
+#undef WANT_PROFILING_GPROF
+
+/* Define to 1 if --enable-profiling=instrument */
+#undef WANT_PROFILING_INSTRUMENT
+
+/* Define to 1 if --enable-profiling=prof */
+#undef WANT_PROFILING_PROF
+
+/* Define one of these to 1 for the desired temporary memory allocation
+   method, per --enable-alloca. */
+#undef WANT_TMP_ALLOCA
+#undef WANT_TMP_REENTRANT
+#undef WANT_TMP_NOTREENTRANT
+#undef WANT_TMP_DEBUG
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+
+/* Define to 1 if the assembler understands the mulx instruction */
+#undef X86_ASM_MULX
+
+/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
+   `char[]'. */
+#undef YYTEXT_POINTER
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
+
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+   nothing if this is not supported.  Do not define if restrict is
+   supported directly.  */
+#undef restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+   __restrict__, even though the corresponding Sun C compiler ends up with
+   "#define restrict _Restrict" or "#define restrict __restrict__" in the
+   previous line.  Perhaps some future version of Sun C++ will work with
+   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+#undef volatile

diff --git a/cxx/dummy.cc b/cxx/dummy.cc
new file mode 100644
index 0000000..8b728ca
--- /dev/null
+++ b/cxx/dummy.cc

@@ -0,0 +1,33 @@
+/* Dummy file to make automake treat libgmpxx.la as C++.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* some compilers reputedly dislike completely empty files */
+typedef int  foo;

diff --git a/cxx/isfuns.cc b/cxx/isfuns.cc
new file mode 100644
index 0000000..0cc0db0
--- /dev/null
+++ b/cxx/isfuns.cc

@@ -0,0 +1,115 @@
+/* Auxiliary functions for C++-style input of GMP types.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+int
+__gmp_istream_set_base (istream &i, char &c, bool &zero, bool &showbase)
+{
+  int base;
+
+  zero = showbase = false;
+  switch (i.flags() & ios::basefield)
+    {
+    case ios::dec:
+      base = 10;
+      break;
+    case ios::hex:
+      base = 16;
+      break;
+    case ios::oct:
+      base = 8;
+      break;
+    default:
+      showbase = true; // look for initial "0" or "0x" or "0X"
+      if (c == '0')
+	{
+	  if (! i.get(c))
+	    c = 0; // reset or we might loop indefinitely
+
+	  if (c == 'x' || c == 'X')
+	    {
+	      base = 16;
+	      i.get(c);
+	    }
+	  else
+	    {
+	      base = 8;
+	      zero = true; // if no other digit is read, the "0" counts
+	    }
+	}
+      else
+	base = 10;
+      break;
+    }
+
+  return base;
+}
+
+void
+__gmp_istream_set_digits (string &s, istream &i, char &c, bool &ok, int base)
+{
+  switch (base)
+    {
+    case 10:
+      while (isdigit(c))
+	{
+	  ok = true; // at least a valid digit was read
+	  s += c;
+	  if (! i.get(c))
+	    break;
+	}
+      break;
+    case 8:
+      while (isdigit(c) && c != '8' && c != '9')
+	{
+	  ok = true; // at least a valid digit was read
+	  s += c;
+	  if (! i.get(c))
+	    break;
+	}
+      break;
+    case 16:
+      while (isxdigit(c))
+	{
+	  ok = true; // at least a valid digit was read
+	  s += c;
+	  if (! i.get(c))
+	    break;
+	}
+      break;
+    }
+}

diff --git a/cxx/ismpf.cc b/cxx/ismpf.cc
new file mode 100644
index 0000000..9ff25eb
--- /dev/null
+++ b/cxx/ismpf.cc

@@ -0,0 +1,144 @@
+/* operator>> -- C++-style input of mpf_t.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include <clocale>    // for localeconv
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_float
+// in include/bits/locale_facets.tcc.
+//
+// There are no plans to accept hex or octal floats, not unless the standard
+// C++ library does so.  Although such formats might be of use, it's
+// considered more important to be compatible with what the normal
+// operator>> does on "double"s etc.
+
+istream &
+operator>> (istream &i, mpf_ptr f)
+{
+  int base;
+  char c = 0;
+  string s;
+  bool ok = false;
+
+  // C decimal point, as expected by mpf_set_str
+  const char *lconv_point = GMP_DECIMAL_POINT;
+
+  // C++ decimal point
+#if HAVE_STD__LOCALE
+  const locale& loc = i.getloc();
+  char point_char = use_facet< numpunct<char> >(loc).decimal_point();
+#else
+  const char *point = lconv_point;
+  char point_char = *point;
+#endif
+
+  i.get(c); // start reading
+
+  if (i.flags() & ios::skipws) // skip initial whitespace
+    {
+      // C++ isspace
+#if HAVE_STD__LOCALE
+      const ctype<char>& ct = use_facet< ctype<char> >(loc);
+#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))
+#else
+#define cxx_isspace(c)  isspace(c)
+#endif
+
+      while (cxx_isspace(c) && i.get(c))
+        ;
+    }
+
+  if (c == '-' || c == '+') // sign
+    {
+      if (c == '-')
+	s = "-";
+      i.get(c);
+    }
+
+  base = 10;
+  __gmp_istream_set_digits(s, i, c, ok, base); // read the number
+
+  // look for the C++ radix point, but put the C one in for mpf_set_str
+  if (c == point_char)
+    {
+#if HAVE_STD__LOCALE
+      i.get(c);
+#else // lconv point can be multi-char
+      for (;;)
+        {
+          i.get(c);
+          point++;
+          if (*point == '\0')
+            break;
+          if (c != *point)
+            goto fail;
+        }
+#endif
+      s += lconv_point;
+      __gmp_istream_set_digits(s, i, c, ok, base); // read the mantissa
+    }
+
+  if (ok && (c == 'e' || c == 'E')) // exponent
+    {
+      s += c;
+      i.get(c);
+      ok = false; // exponent is mandatory
+
+      if (c == '-' || c == '+') // sign
+	{
+	  s += c;
+	  i.get(c);
+	}
+
+      __gmp_istream_set_digits(s, i, c, ok, base); // read the exponent
+    }
+
+  if (i.good()) // last character read was non-numeric
+    i.putback(c);
+  else if (i.eof() && ok) // stopped just before eof
+    i.clear(ios::eofbit);
+
+  if (ok)
+    ASSERT_NOCARRY (mpf_set_str(f, s.c_str(), base)); // extract the number
+  else
+    {
+    fail:
+      i.setstate(ios::failbit); // read failed
+    }
+
+  return i;
+}

diff --git a/cxx/ismpq.cc b/cxx/ismpq.cc
new file mode 100644
index 0000000..8cd5121
--- /dev/null
+++ b/cxx/ismpq.cc

@@ -0,0 +1,66 @@
+/* operator>> -- C++-style input of mpq_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+istream &
+operator>> (istream &i, mpq_ptr q)
+{
+  if (! (i >> mpq_numref(q)))
+    return i;
+
+  char  c = 0;
+  i.get(c); // start reading
+
+  if (c == '/')
+    {
+      // skip slash, read denominator
+      i.get(c);
+      return __gmpz_operator_in_nowhite (i, mpq_denref(q), c);
+    }
+  else
+    {
+      // no denominator, set 1
+      q->_mp_den._mp_size = 1;
+      q->_mp_den._mp_d[0] = 1;
+      if (i.good())
+        i.putback(c);
+      else if (i.eof())
+        i.clear(ios::eofbit);
+    }
+
+  return i;
+}

diff --git a/cxx/ismpz.cc b/cxx/ismpz.cc
new file mode 100644
index 0000000..263ced1
--- /dev/null
+++ b/cxx/ismpz.cc

@@ -0,0 +1,62 @@
+/* operator>> -- C++-style input of mpz_t.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in
+// include/bits/locale_facets.tcc.
+
+istream &
+operator>> (istream &i, mpz_ptr z)
+{
+  char c = 0;
+  i.get(c); // start reading
+
+  if (i.flags() & ios::skipws) // skip initial whitespace
+    {
+#if HAVE_STD__LOCALE
+      const ctype<char>& ct = use_facet< ctype<char> >(i.getloc());
+#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))
+#else
+#define cxx_isspace(c)  isspace(c)
+#endif
+
+      while (cxx_isspace(c) && i.get(c))
+        ;
+    }
+
+  return __gmpz_operator_in_nowhite (i, z, c);
+}

diff --git a/cxx/ismpznw.cc b/cxx/ismpznw.cc
new file mode 100644
index 0000000..84f2806
--- /dev/null
+++ b/cxx/ismpznw.cc

@@ -0,0 +1,72 @@
+/* __gmpz_operator_in_nowhite -- C++-style input of mpz_t, no whitespace skip.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in
+// include/bits/locale_facets.tcc.
+
+istream &
+__gmpz_operator_in_nowhite (istream &i, mpz_ptr z, char c)
+{
+  int base;
+  string s;
+  bool ok = false, zero, showbase;
+
+  if (c == '-' || c == '+') // sign
+    {
+      if (c == '-') // mpz_set_str doesn't accept '+'
+	s = "-";
+      i.get(c);
+    }
+
+  base = __gmp_istream_set_base(i, c, zero, showbase); // select the base
+  __gmp_istream_set_digits(s, i, c, ok, base);         // read the number
+
+  if (i.good()) // last character read was non-numeric
+    i.putback(c);
+  else if (i.eof() && (ok || zero)) // stopped just before eof
+    i.clear(ios::eofbit);
+
+  if (ok)
+    ASSERT_NOCARRY (mpz_set_str (z, s.c_str(), base)); // extract the number
+  else if (zero)
+    mpz_set_ui(z, 0);
+  else
+    i.setstate(ios::failbit); // read failed
+
+  return i;
+}

diff --git a/cxx/limits.cc b/cxx/limits.cc
new file mode 100644
index 0000000..3004e16
--- /dev/null
+++ b/cxx/limits.cc

@@ -0,0 +1,62 @@
+/* instantiation of numeric_limits specializations.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+
+namespace std {
+#define GMPXX_INSTANTIATE_LIMITS(T) \
+  const bool numeric_limits<T>::is_specialized; \
+  const int  numeric_limits<T>::digits; \
+  const int  numeric_limits<T>::digits10; \
+  const int  numeric_limits<T>::max_digits10; \
+  const bool numeric_limits<T>::is_signed; \
+  const bool numeric_limits<T>::is_integer; \
+  const bool numeric_limits<T>::is_exact; \
+  const int  numeric_limits<T>::radix; \
+  const int  numeric_limits<T>::min_exponent; \
+  const int  numeric_limits<T>::min_exponent10; \
+  const int  numeric_limits<T>::max_exponent; \
+  const int  numeric_limits<T>::max_exponent10; \
+  const bool numeric_limits<T>::has_infinity; \
+  const bool numeric_limits<T>::has_quiet_NaN; \
+  const bool numeric_limits<T>::has_signaling_NaN; \
+  const float_denorm_style numeric_limits<T>::has_denorm; \
+  const bool numeric_limits<T>::has_denorm_loss; \
+  const bool numeric_limits<T>::is_iec559; \
+  const bool numeric_limits<T>::is_bounded; \
+  const bool numeric_limits<T>::is_modulo; \
+  const bool numeric_limits<T>::traps; \
+  const bool numeric_limits<T>::tinyness_before; \
+  const float_round_style numeric_limits<T>::round_style
+
+  GMPXX_INSTANTIATE_LIMITS(mpz_class);
+  GMPXX_INSTANTIATE_LIMITS(mpq_class);
+  GMPXX_INSTANTIATE_LIMITS(mpf_class);
+}

diff --git a/cxx/osdoprnti.cc b/cxx/osdoprnti.cc
new file mode 100644
index 0000000..00dee15
--- /dev/null
+++ b/cxx/osdoprnti.cc

@@ -0,0 +1,67 @@
+/* __gmp_doprnt_integer_ios -- integer formatted output to an ostream.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>   /* for va_list and hence doprnt_funs_t */
+#include <string.h>   /* for strlen */
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* The gmp_asprintf support routines never give an error, so
+   __gmp_doprnt_integer shouldn't fail and it's return can just be checked
+   with an ASSERT.  */
+
+ostream&
+__gmp_doprnt_integer_ostream (ostream &o, struct doprnt_params_t *p,
+                              char *s)
+{
+  struct gmp_asprintf_t   d;
+  char  *result;
+  int   ret;
+
+  /* don't show leading zeros the way printf does */
+  p->prec = -1;
+
+  GMP_ASPRINTF_T_INIT (d, &result);
+  ret = __gmp_doprnt_integer (&__gmp_asprintf_funs_noformat, &d, p, s);
+  ASSERT (ret != -1);
+  __gmp_asprintf_final (&d);
+  (*__gmp_free_func) (s, strlen(s)+1);
+
+  gmp_allocated_string  t (result);
+  return o.write (t.str, t.len);
+}

diff --git a/cxx/osfuns.cc b/cxx/osfuns.cc
new file mode 100644
index 0000000..81590a8
--- /dev/null
+++ b/cxx/osfuns.cc

@@ -0,0 +1,123 @@
+/* Support for operator<< routines.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* Don't need "format" for operator<< routines, just "memory" and "reps".
+   Omitting gmp_asprintf_format lets us avoid dragging vsnprintf into the
+   link.  __gmp_asprintf_final will be called directly and doesn't need to
+   be in the struct.  */
+
+const struct doprnt_funs_t  __gmp_asprintf_funs_noformat = {
+  NULL,
+  (doprnt_memory_t) __gmp_asprintf_memory,
+  (doprnt_reps_t)   __gmp_asprintf_reps,
+  NULL
+};
+
+
+void
+__gmp_doprnt_params_from_ios (struct doprnt_params_t *p, ios &o)
+{
+  if ((o.flags() & ios::basefield) == ios::hex)
+    {
+      p->expfmt = "@%c%02d";
+      p->base = (o.flags() & ios::uppercase ? -16 : 16);
+    }
+  else
+    {
+      p->expfmt = (o.flags() & ios::uppercase ? "E%c%02d" : "e%c%02d");
+      if ((o.flags() & ios::basefield) == ios::oct)
+        p->base = 8;
+      else
+        p->base = 10;
+    }
+
+  /* "general" if none or more than one bit set */
+  if ((o.flags() & ios::floatfield) == ios::fixed)
+    p->conv = DOPRNT_CONV_FIXED;
+  else if ((o.flags() & ios::floatfield) == ios::scientific)
+    p->conv = DOPRNT_CONV_SCIENTIFIC;
+  else
+    p->conv = DOPRNT_CONV_GENERAL;
+
+  p->exptimes4 = 0;
+
+  p->fill = o.fill();
+
+  /* "right" if more than one bit set */
+  if ((o.flags() & ios::adjustfield) == ios::left)
+    p->justify = DOPRNT_JUSTIFY_LEFT;
+  else if ((o.flags() & ios::adjustfield) == ios::internal)
+    p->justify = DOPRNT_JUSTIFY_INTERNAL;
+  else
+    p->justify = DOPRNT_JUSTIFY_RIGHT;
+
+  /* ios::fixed allows prec==0, others take 0 as the default 6.
+     Don't allow negatives (they do bad things to __gmp_doprnt_float_cxx).  */
+  p->prec = MAX (0, o.precision());
+  if (p->prec == 0 && p->conv != DOPRNT_CONV_FIXED)
+    p->prec = 6;
+
+  /* for hex showbase is always, for octal only non-zero */
+  if (o.flags() & ios::showbase)
+    p->showbase = ((o.flags() & ios::basefield) == ios::hex
+                   ? DOPRNT_SHOWBASE_YES : DOPRNT_SHOWBASE_NONZERO);
+  else
+    p->showbase = DOPRNT_SHOWBASE_NO;
+
+  p->showpoint = ((o.flags() & ios::showpoint) != 0);
+
+  /* in fixed and scientific always show trailing zeros, in general format
+     show them if showpoint is set (or so it seems) */
+  if ((o.flags() & ios::floatfield) == ios::fixed
+      || (o.flags() & ios::floatfield) == ios::scientific)
+    p->showtrailing = 1;
+  else
+    p->showtrailing = p->showpoint;
+
+  p->sign = (o.flags() & ios::showpos ? '+' : '\0');
+
+  p->width = o.width();
+
+  /* reset on each output */
+  o.width (0);
+}

diff --git a/cxx/osmpf.cc b/cxx/osmpf.cc
new file mode 100644
index 0000000..fd875db
--- /dev/null
+++ b/cxx/osmpf.cc

@@ -0,0 +1,70 @@
+/* operator<< -- mpf formatted output to an ostream.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <clocale>
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* The gmp_asprintf support routines never give an error, so
+   __gmp_doprnt_mpf shouldn't fail and it's return can just be checked with
+   an ASSERT.  */
+
+ostream&
+operator<< (ostream &o, mpf_srcptr f)
+{
+  struct doprnt_params_t  param;
+  struct gmp_asprintf_t   d;
+  char  *result;
+  int   ret;
+
+  __gmp_doprnt_params_from_ios (&param, o);
+
+#if HAVE_STD__LOCALE
+  char  point[2];
+  point[0] = use_facet< numpunct<char> >(o.getloc()).decimal_point();
+  point[1] = '\0';
+#else
+  const char *point = GMP_DECIMAL_POINT;
+#endif
+
+  GMP_ASPRINTF_T_INIT (d, &result);
+  ret = __gmp_doprnt_mpf (&__gmp_asprintf_funs_noformat, &d, &param, point, f);
+  ASSERT (ret != -1);
+  __gmp_asprintf_final (&d);
+
+  gmp_allocated_string  t (result);
+  return o.write (t.str, t.len);
+}

diff --git a/cxx/osmpq.cc b/cxx/osmpq.cc
new file mode 100644
index 0000000..0c97557
--- /dev/null
+++ b/cxx/osmpq.cc

@@ -0,0 +1,47 @@
+/* operator<< -- mpq formatted output to an ostream.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+ostream&
+operator<< (ostream &o, mpq_srcptr q)
+{
+  struct doprnt_params_t  param;
+  __gmp_doprnt_params_from_ios (&param, o);
+  return __gmp_doprnt_integer_ostream (o, &param,
+                                       mpq_get_str (NULL, param.base, q));
+}

diff --git a/cxx/osmpz.cc b/cxx/osmpz.cc
new file mode 100644
index 0000000..2ee71d3
--- /dev/null
+++ b/cxx/osmpz.cc

@@ -0,0 +1,47 @@
+/* operator<< -- mpz formatted output to an ostream.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+ostream&
+operator<< (ostream &o, mpz_srcptr z)
+{
+  struct doprnt_params_t  param;
+  __gmp_doprnt_params_from_ios (&param, o);
+  return __gmp_doprnt_integer_ostream (o, &param,
+                                       mpz_get_str (NULL, param.base, z));
+}

diff --git a/errno.c b/errno.c
new file mode 100644
index 0000000..d71c146
--- /dev/null
+++ b/errno.c

@@ -0,0 +1,77 @@
+/* gmp_errno, __gmp_exception -- exception handling and reporting.
+
+   THE FUNCTIONS IN THIS FILE, APART FROM gmp_errno, ARE FOR INTERNAL USE
+   ONLY.  THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR
+   DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2003, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+
+#include <signal.h>
+
+#include "gmp-impl.h"
+
+int gmp_errno = 0;
+
+
+/* Use SIGFPE on systems which have it. Otherwise, deliberate divide
+   by zero, which triggers an exception on most systems. On those
+   where it doesn't, for example power and powerpc, use abort instead. */
+void
+__gmp_exception (int error_bit)
+{
+  gmp_errno |= error_bit;
+#ifdef SIGFPE
+  raise (SIGFPE);
+#else
+  __gmp_junk = 10 / __gmp_0;
+#endif
+  abort ();
+}
+
+
+/* These functions minimize the amount of code required in functions raising
+   exceptions.  Since they're "noreturn" and don't take any parameters, a
+   test and call might even come out as a simple conditional jump.  */
+void
+__gmp_sqrt_of_negative (void)
+{
+  __gmp_exception (GMP_ERROR_SQRT_OF_NEGATIVE);
+}
+void
+__gmp_divide_by_zero (void)
+{
+  __gmp_exception (GMP_ERROR_DIVISION_BY_ZERO);
+}
+void
+__gmp_overflow_in_mpz (void)
+{
+  __gmp_exception (GMP_ERROR_MPZ_OVERFLOW);
+}

diff --git a/extract-dbl.c b/extract-dbl.c
new file mode 100644
index 0000000..e44d6fa
--- /dev/null
+++ b/extract-dbl.c

@@ -0,0 +1,310 @@
+/* __gmp_extract_double -- convert from double to array of mp_limb_t.
+
+Copyright 1996, 1999-2002, 2006, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* Extract a non-negative double in d.  */
+
+int
+__gmp_extract_double (mp_ptr rp, double d)
+{
+  long exp;
+  unsigned sc;
+#ifdef _LONG_LONG_LIMB
+#define BITS_PER_PART 64	/* somewhat bogus */
+  unsigned long long int manl;
+#else
+#define BITS_PER_PART GMP_LIMB_BITS
+  unsigned long int manh, manl;
+#endif
+
+  /* BUGS
+
+     1. Should handle Inf and NaN in IEEE specific code.
+     2. Handle Inf and NaN also in default code, to avoid hangs.
+     3. Generalize to handle all GMP_LIMB_BITS >= 32.
+     4. This lits is incomplete and misspelled.
+   */
+
+  ASSERT (d >= 0.0);
+
+  if (d == 0.0)
+    {
+      MPN_ZERO (rp, LIMBS_PER_DOUBLE);
+      return 0;
+    }
+
+#if _GMP_IEEE_FLOATS
+  {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+    /* Work around alpha-specific bug in GCC 2.8.x.  */
+    volatile
+#endif
+    union ieee_double_extract x;
+    x.d = d;
+    exp = x.s.exp;
+#if BITS_PER_PART == 64		/* generalize this to BITS_PER_PART > BITS_IN_MANTISSA */
+    manl = (((mp_limb_t) 1 << 63)
+	    | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11));
+    if (exp == 0)
+      {
+	/* Denormalized number.  Don't try to be clever about this,
+	   since it is not an important case to make fast.  */
+	exp = 1;
+	do
+	  {
+	    manl = manl << 1;
+	    exp--;
+	  }
+	while ((manl & GMP_LIMB_HIGHBIT) == 0);
+      }
+#endif
+#if BITS_PER_PART == 32
+    manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
+    manl = x.s.manl << 11;
+    if (exp == 0)
+      {
+	/* Denormalized number.  Don't try to be clever about this,
+	   since it is not an important case to make fast.  */
+	exp = 1;
+	do
+	  {
+	    manh = (manh << 1) | (manl >> 31);
+	    manl = manl << 1;
+	    exp--;
+	  }
+	while ((manh & GMP_LIMB_HIGHBIT) == 0);
+      }
+#endif
+#if BITS_PER_PART != 32 && BITS_PER_PART != 64
+  You need to generalize the code above to handle this.
+#endif
+    exp -= 1022;		/* Remove IEEE bias.  */
+  }
+#else
+  {
+    /* Unknown (or known to be non-IEEE) double format.  */
+    exp = 0;
+    if (d >= 1.0)
+      {
+	ASSERT_ALWAYS (d * 0.5 != d);
+
+	while (d >= 32768.0)
+	  {
+	    d *= (1.0 / 65536.0);
+	    exp += 16;
+	  }
+	while (d >= 1.0)
+	  {
+	    d *= 0.5;
+	    exp += 1;
+	  }
+      }
+    else if (d < 0.5)
+      {
+	while (d < (1.0 / 65536.0))
+	  {
+	    d *=  65536.0;
+	    exp -= 16;
+	  }
+	while (d < 0.5)
+	  {
+	    d *= 2.0;
+	    exp -= 1;
+	  }
+      }
+
+    d *= (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));
+#if BITS_PER_PART == 64
+    manl = d;
+#endif
+#if BITS_PER_PART == 32
+    manh = d;
+    manl = (d - manh) * (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));
+#endif
+  }
+#endif /* IEEE */
+
+  sc = (unsigned) (exp + 64 * GMP_NUMB_BITS) % GMP_NUMB_BITS;
+
+  /* We add something here to get rounding right.  */
+  exp = (exp + 64 * GMP_NUMB_BITS) / GMP_NUMB_BITS - 64 * GMP_NUMB_BITS / GMP_NUMB_BITS + 1;
+
+#if BITS_PER_PART == 64 && LIMBS_PER_DOUBLE == 2
+#if GMP_NAIL_BITS == 0
+  if (sc != 0)
+    {
+      rp[1] = manl >> (GMP_LIMB_BITS - sc);
+      rp[0] = manl << sc;
+    }
+  else
+    {
+      rp[1] = manl;
+      rp[0] = 0;
+      exp--;
+    }
+#else
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[1] = manl >> (GMP_LIMB_BITS - sc);
+      rp[0] = (manl << (sc - GMP_NAIL_BITS)) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+	{
+	  rp[1] = manl >> GMP_NAIL_BITS;
+	  rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+	  exp--;
+	}
+      else
+	{
+	  rp[1] = manl >> (GMP_LIMB_BITS - sc);
+	  rp[0] = (manl >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;
+	}
+    }
+#endif
+#endif
+
+#if BITS_PER_PART == 64 && LIMBS_PER_DOUBLE == 3
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[2] = manl >> (GMP_LIMB_BITS - sc);
+      rp[1] = (manl << sc - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+      if (sc >= 2 * GMP_NAIL_BITS)
+	rp[0] = 0;
+      else
+	rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS + sc) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+	{
+	  rp[2] = manl >> GMP_NAIL_BITS;
+	  rp[1] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+	  rp[0] = 0;
+	  exp--;
+	}
+      else
+	{
+	  rp[2] = manl >> (GMP_LIMB_BITS - sc);
+	  rp[1] = (manl >> GMP_NAIL_BITS - sc) & GMP_NUMB_MASK;
+	  rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS + sc) & GMP_NUMB_MASK;
+	}
+    }
+#endif
+
+#if BITS_PER_PART == 32 && LIMBS_PER_DOUBLE == 3
+#if GMP_NAIL_BITS == 0
+  if (sc != 0)
+    {
+      rp[2] = manh >> (GMP_LIMB_BITS - sc);
+      rp[1] = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));
+      rp[0] = manl << sc;
+    }
+  else
+    {
+      rp[2] = manh;
+      rp[1] = manl;
+      rp[0] = 0;
+      exp--;
+    }
+#else
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[2] = (manh >> (GMP_LIMB_BITS - sc));
+      rp[1] = ((manh << (sc - GMP_NAIL_BITS)) |
+	       (manl >> (GMP_LIMB_BITS - sc + GMP_NAIL_BITS))) & GMP_NUMB_MASK;
+      if (sc >= 2 * GMP_NAIL_BITS)
+	rp[0] = (manl << sc - 2 * GMP_NAIL_BITS) & GMP_NUMB_MASK;
+      else
+	rp[0] = manl >> (2 * GMP_NAIL_BITS - sc) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+	{
+	  rp[2] = manh >> GMP_NAIL_BITS;
+	  rp[1] = ((manh << GMP_NUMB_BITS - GMP_NAIL_BITS) | (manl >> 2 * GMP_NAIL_BITS)) & GMP_NUMB_MASK;
+	  rp[0] = (manl << GMP_NUMB_BITS - 2 * GMP_NAIL_BITS) & GMP_NUMB_MASK;
+	  exp--;
+	}
+      else
+	{
+	  rp[2] = (manh >> (GMP_LIMB_BITS - sc));
+	  rp[1] = (manh >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;
+	  rp[0] = ((manh << (GMP_NUMB_BITS - GMP_NAIL_BITS + sc))
+		   | (manl >> (GMP_LIMB_BITS - (GMP_NUMB_BITS - GMP_NAIL_BITS + sc)))) & GMP_NUMB_MASK;
+	}
+    }
+#endif
+#endif
+
+#if BITS_PER_PART == 32 && LIMBS_PER_DOUBLE > 3
+  if (sc == 0)
+    {
+      int i;
+
+      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)
+	{
+	  rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);
+	  manh = ((manh << GMP_NUMB_BITS)
+		  | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));
+	  manl = manl << GMP_NUMB_BITS;
+	}
+      exp--;
+    }
+  else
+    {
+      int i;
+
+      rp[LIMBS_PER_DOUBLE - 1] = (manh >> (GMP_LIMB_BITS - sc));
+      manh = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));
+      manl = (manl << sc);
+      for (i = LIMBS_PER_DOUBLE - 2; i >= 0; i--)
+	{
+	  rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);
+	  manh = ((manh << GMP_NUMB_BITS)
+		  | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));
+	  manl = manl << GMP_NUMB_BITS;
+	}
+  }
+#endif
+
+  return exp;
+}

diff --git a/gen-bases.c b/gen-bases.c
new file mode 100644
index 0000000..5f5e7ed
--- /dev/null
+++ b/gen-bases.c

@@ -0,0 +1,265 @@
+/* Generate mp_bases data.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004, 2011, 2012,
+2015-2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "bootstrap.c"
+
+
+int    chars_per_limb;
+int    big_base_ctz;
+mpz_t  big_base;
+int    normalization_steps;
+mpz_t  big_base_inverted;
+mpz_t  big_base_binverted;
+
+mpz_t  t;
+
+#define POW2_P(n)  (((n) & ((n) - 1)) == 0)
+
+unsigned int
+ulog2 (unsigned int x)
+{
+  unsigned int i;
+  for (i = 0;  x != 0;  i++)
+    x >>= 1;
+  return i;
+}
+
+void
+binvert (int numb_bits)
+{
+  mpz_t bbo;
+
+  mpz_init_set (bbo, big_base);
+  big_base_ctz = mpz_make_odd (bbo);
+  mpz_invert_2exp (big_base_binverted, bbo, numb_bits);
+}
+
+void
+generate (int limb_bits, int nail_bits, int base)
+{
+  int  numb_bits = limb_bits - nail_bits;
+
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, numb_bits);
+  mpz_set_ui (big_base, (long) base);
+  chars_per_limb = 0;
+  while (mpz_cmp (big_base, t) <= 0)
+    {
+      mpz_mul_ui (big_base, big_base, (long) base);
+      chars_per_limb++;
+    }
+
+  mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);
+
+  normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);
+
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, 2*limb_bits - normalization_steps);
+  mpz_tdiv_q (big_base_inverted, t, big_base);
+  mpz_clrbit (big_base_inverted, limb_bits);
+
+  binvert (numb_bits);
+}
+
+void
+header (int limb_bits, int nail_bits)
+{
+  int  numb_bits = limb_bits - nail_bits;
+
+  generate (limb_bits, nail_bits, 10);
+
+  printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("/* mp_bases[10] data, as literal values */\n");
+  printf ("#define MP_BASES_CHARS_PER_LIMB_10      %d\n", chars_per_limb);
+  printf ("#define MP_BASES_BIG_BASE_CTZ_10        %d\n", big_base_ctz);
+  printf ("#define MP_BASES_BIG_BASE_10            CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, big_base);
+  printf (")\n");
+  printf ("#define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, big_base_inverted);
+  printf (")\n");
+  printf ("#define MP_BASES_BIG_BASE_BINVERTED_10  CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, big_base_binverted);
+  printf (")\n");
+  printf ("#define MP_BASES_NORMALIZATION_STEPS_10 %d\n", normalization_steps);
+}
+
+
+#define EXTRA 16
+
+/* Compute log(2)/log(b) as a fixnum. */
+void
+mp_2logb (mpz_t r, int bi, int prec)
+{
+  mpz_t t, t2, two, b;
+  int i;
+
+  mpz_init (t);
+  mpz_setbit (t, prec + EXTRA);
+
+  mpz_init (t2);
+
+  mpz_init (two);
+  mpz_setbit (two, prec + EXTRA + 1);
+
+  mpz_set_ui (r, 0);
+
+  mpz_init_set_ui (b, bi);
+  mpz_mul_2exp (b, b, prec+EXTRA);
+
+  for (i = prec-1; i >= 0; i--)
+    {
+      mpz_mul_2exp (b, b, prec+EXTRA);
+      mpz_sqrt (b, b);
+
+      mpz_mul (t2, t, b);
+      mpz_tdiv_q_2exp (t2, t2, prec+EXTRA);
+
+      if (mpz_cmp (t2, two) < 0)	/* not too large? */
+	{
+	  mpz_setbit (r, i);		/* set next less significant bit */
+	  mpz_swap (t, t2);		/* new value acceptable */
+	}
+    }
+
+  mpz_clear (t);
+  mpz_clear (t2);
+  mpz_clear (two);
+  mpz_clear (b);
+}
+
+void
+table (int limb_bits, int nail_bits)
+{
+  int  numb_bits = limb_bits - nail_bits;
+  int  base;
+  mpz_t r, t, logb2, log2b;
+
+  mpz_init (r);
+  mpz_init (t);
+  mpz_init (logb2);
+  mpz_init (log2b);
+
+  printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#include \"gmp-impl.h\"\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  puts ("const struct bases mp_bases[257] =\n{");
+  puts ("  /*   0 */ { 0, 0, 0, 0, 0 },");
+  puts ("  /*   1 */ { 0, 0, 0, 0, 0 },");
+  for (base = 2; base <= 256; base++)
+    {
+      generate (limb_bits, nail_bits, base);
+      mp_2logb (r, base, limb_bits + 8);
+      mpz_tdiv_q_2exp (logb2, r, 8);
+      mpz_set_ui (t, 1);
+      mpz_mul_2exp (t, t, 2*limb_bits + 5);
+      mpz_sub_ui (t, t, 1);
+      mpz_add_ui (r, r, 1);
+      mpz_tdiv_q (log2b, t, r);
+
+      printf ("  /* %3u */ { ", base);
+      if (POW2_P (base))
+	{
+          mpz_set_ui (big_base, ulog2 (base) - 1);
+	  mpz_set_ui (big_base_inverted, 0);
+	}
+
+      printf ("%u,", chars_per_limb);
+      printf (" CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, logb2);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, log2b);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, big_base);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, big_base_inverted);
+      printf (") },\n");
+    }
+
+  puts ("};");
+
+  mpz_clear (r);
+  mpz_clear (t);
+  mpz_clear (logb2);
+  mpz_clear (log2b);
+
+}
+
+int
+main (int argc, char **argv)
+{
+  int  limb_bits, nail_bits;
+
+  mpz_init (big_base);
+  mpz_init (big_base_inverted);
+  mpz_init (big_base_binverted);
+  mpz_init (t);
+
+  if (argc != 4)
+    {
+      fprintf (stderr, "Usage: gen-bases <header|table> <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[2]);
+  nail_bits = atoi (argv[3]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+
+  if (strcmp (argv[1], "header") == 0)
+    header (limb_bits, nail_bits);
+  else if (strcmp (argv[1], "table") == 0)
+    table (limb_bits, nail_bits);
+  else
+    {
+      fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
+      exit (1);
+    }
+
+  return 0;
+}

diff --git a/gen-fac.c b/gen-fac.c
new file mode 100644
index 0000000..93ebf7b
--- /dev/null
+++ b/gen-fac.c

@@ -0,0 +1,285 @@
+/* Generate data for combinatorics: fac_ui, bin_uiui, ...
+
+Copyright 2002, 2011-2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bootstrap.c"
+
+int
+mpz_remove_twos (mpz_t x)
+{
+  mp_bitcnt_t r = mpz_scan1(x, 0);
+  mpz_tdiv_q_2exp (x, x, r);
+  return r;
+}
+
+/* returns 0 on success		*/
+int
+gen_consts (unsigned numb, unsigned limb)
+{
+  mpz_t x, mask, y, last;
+  unsigned long a, b;
+  unsigned long ofl, ofe;
+
+  printf ("/* This file is automatically generated by gen-fac.c */\n\n");
+  printf ("#if GMP_NUMB_BITS != %u\n", numb);
+  printf ("Error , error this data is for %u GMP_NUMB_BITS only\n", numb);
+  printf ("#endif\n");
+#if 0
+  printf ("#if GMP_LIMB_BITS != %u\n", limb);
+  printf ("Error , error this data is for %u GMP_LIMB_BITS only\n", limb);
+  printf ("#endif\n");
+#endif
+
+  printf
+    ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1");
+  mpz_init_set_ui (x, 1);
+  mpz_init (last);
+  for (b = 2;; b++)
+    {
+      mpz_mul_ui (x, x, b);	/* so b!=a       */
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  printf
+    ("\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n");
+  printf
+    ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      mpz_swap (last, x);
+      mpz_mul_ui (x, last, a);
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  printf
+    ("#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, last);
+  printf (")\n");
+
+  ofl = b - 1;
+  printf
+    ("#define ODD_FACTORIAL_TABLE_LIMIT (%lu)\n", ofl);
+  mpz_init (mask);
+  mpz_setbit (mask, numb);
+  mpz_sub_ui (mask, mask, 1);
+  printf
+    ("\n/* Previous table, continued, values modulo 2^GMP_NUMB_BITS */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_EXTTABLE CNST_LIMB(0x");
+  mpz_and (x, x, mask);
+  mpz_out_str (stdout, 16, x);
+  mpz_init (y);
+  mpz_bin_uiui (y, b, b/2);
+  b++;
+  for (;; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      if (a == b) {
+	mpz_divexact_ui (y, y, a/2+1);
+	mpz_mul_ui (y, y, a);
+      } else
+	mpz_mul_2exp (y, y, 1);
+      if (mpz_sizeinbase (y, 2) > numb)
+	break;
+      mpz_mul_ui (x, x, a);
+      mpz_and (x, x, mask);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  ofe = b - 1;
+  printf
+    ("#define ODD_FACTORIAL_EXTTABLE_LIMIT (%lu)\n", ofe);
+
+  printf
+    ("\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;; b+=2)
+    {
+      mpz_swap (last, x);
+      mpz_mul_ui (x, last, b);
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  printf
+    ("#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, last);
+  printf (")\n");
+
+  printf
+    ("#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (%lu)\n", b - 2);
+
+  printf
+    ("\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define NTH_ROOT_NUMB_MASK_TABLE (GMP_NUMB_MASK");
+  for (b = 2;b <= 8; b++)
+    {
+      mpz_root (x, mask, b);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  mpz_add_ui (mask, mask, 1);
+  printf
+    ("\n/* This table contains inverses of odd factorials, modulo 2^GMP_NUMB_BITS */\n");
+  printf
+    ("\n/* It begins with (2!/2)^-1=1 */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;b <= ofe - 2; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      mpz_mul_ui (x, x, a);
+      mpz_invert (y, x, mask);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, y);
+    }
+  printf (")\n");
+
+  ofe = (ofe / 16 + 1) * 16;
+
+  printf
+    ("\n/* This table contains 2n-popc(2n) for small n */\n");
+  printf
+    ("\n/* It begins with 2-1=1 (n=1) */\n");
+  printf
+    ("#define TABLE_2N_MINUS_POPC_2N 1");
+  for (b = 4; b <= ofe; b += 2)
+    {
+      mpz_set_ui (x, b);
+      printf (",%lu",b - mpz_popcount (x));
+    }
+  printf ("\n");
+  printf
+    ("#define TABLE_LIMIT_2N_MINUS_POPC_2N %lu\n", ofe + 1);
+
+
+  ofl = (ofl + 1) / 2;
+  printf
+    ("#define ODD_CENTRAL_BINOMIAL_OFFSET (%lu)\n", ofl);
+  printf
+    ("\n/* This table contains binomial(2k,k)/2^t */\n");
+  printf
+    ("\n/* It begins with ODD_CENTRAL_BINOMIAL_TABLE_MIN */\n");
+  printf
+    ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE ");
+  for (b = ofl;; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      mpz_remove_twos (x);
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      if (b != ofl)
+	printf ("),");
+      printf("CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  ofe = b - 1;
+  printf
+    ("#define ODD_CENTRAL_BINOMIAL_TABLE_LIMIT (%lu)\n", ofe);
+
+  printf
+    ("\n/* This table contains the inverses of elements in the previous table. */\n");
+  printf
+    ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE CNST_LIMB(0x");
+  for (b = ofl; b <= ofe; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      mpz_remove_twos (x);
+      mpz_invert (x, x, mask);
+      mpz_out_str (stdout, 16, x);
+      if (b != ofe)
+	printf ("),CNST_LIMB(0x");
+    }
+  printf (")\n");
+
+  printf
+    ("\n/* This table contains the values t in the formula binomial(2k,k)/2^t */\n");
+  printf
+    ("#define CENTRAL_BINOMIAL_2FAC_TABLE ");
+  for (b = ofl; b <= ofe; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      printf ("%d", mpz_remove_twos (x));
+      if (b != ofe)
+	printf (",");
+    }
+  printf ("\n");
+
+  return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int nail_bits, limb_bits, numb_bits;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "Usage: gen-fac limbbits nailbits\n");
+      exit (1);
+    }
+  limb_bits = atoi (argv[1]);
+  nail_bits = atoi (argv[2]);
+  numb_bits = limb_bits - nail_bits;
+  if (limb_bits < 2 || nail_bits < 0 || numb_bits < 1)
+    {
+      fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
+	       nail_bits);
+      exit (1);
+    }
+  gen_consts (numb_bits, limb_bits);
+  return 0;
+}

diff --git a/gen-fib.c b/gen-fib.c
new file mode 100644
index 0000000..647a6bb
--- /dev/null
+++ b/gen-fib.c

@@ -0,0 +1,156 @@
+/* Generate Fibonacci table data.
+
+Copyright 2001, 2002, 2004, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "bootstrap.c"
+
+mpz_t  *f;
+int    fnum, fib_limit, luc_limit;
+
+void
+generate (int numb_bits)
+{
+  mpz_t  limit, l;
+  int    falloc, i;
+
+  mpz_init2 (limit, numb_bits + 1);
+  mpz_setbit (limit, numb_bits);
+
+  /* fib(2n) > 2^n, so use 2n as a limit for the table size */
+  falloc = 2 * numb_bits;
+  f = (mpz_t*) xmalloc (falloc * sizeof (*f));
+
+  mpz_init_set_ui (f[0], 1L);  /* F[-1] */
+  mpz_init_set_ui (f[1], 0L);  /* F[0] */
+
+  mpz_init (l);
+
+  for (i = 2; ; i++)
+    {
+      assert (i < falloc);
+
+      /* F[i] = F[i-1] + F[i-2] */
+      mpz_init (f[i]);
+      mpz_add (f[i], f[i-1], f[i-2]);
+      if (mpz_cmp (f[i], limit) >= 0)
+        break;
+
+      fnum = i+1;
+      fib_limit = i-1;
+
+      /* L[i] = F[i]+2*F[i-1] */
+      mpz_add (l, f[i], f[i-1]);
+      mpz_add (l, l, f[i-1]);
+
+      if (mpz_cmp (l, limit) < 0)
+        luc_limit = i-1;
+    }
+
+  mpz_clear (limit);
+}
+
+
+void
+header (int numb_bits)
+{
+  printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("#define FIB_TABLE_LIMIT         %d\n", fib_limit);
+  printf ("#define FIB_TABLE_LUCNUM_LIMIT  %d\n", luc_limit);
+}
+
+void
+table (int numb_bits)
+{
+  int  i;
+
+  printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#include \"gmp.h\"\n");
+  printf ("#include \"gmp-impl.h\"\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("const mp_limb_t\n");
+  printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
+
+  for (i = 0; i < fnum; i++)
+    {
+      printf ("  CNST_LIMB (0x");
+      mpz_out_str (stdout, 16, f[i]);
+      printf ("),  /* %d */\n", i-1);
+    }
+  printf ("};\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int  limb_bits, nail_bits, numb_bits;
+
+  if (argc != 4)
+    {
+      fprintf (stderr, "Usage: gen-fib <header|table> <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[2]);
+  nail_bits = atoi (argv[3]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+  numb_bits = limb_bits - nail_bits;
+
+  generate (numb_bits);
+
+  if (strcmp (argv[1], "header") == 0)
+    header (numb_bits);
+  else if (strcmp (argv[1], "table") == 0)
+    table (numb_bits);
+  else
+    {
+      fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
+      exit (1);
+    }
+
+  return 0;
+}

diff --git a/gen-jacobitab.c b/gen-jacobitab.c
new file mode 100644
index 0000000..537994b
--- /dev/null
+++ b/gen-jacobitab.c

@@ -0,0 +1,128 @@
+/* gen-jacobi.c
+
+   Contributed to the GNU project by Niels Möller.
+
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* Generate the lookup table needed for fast left-to-right computation
+   of the Jacobi symbol. */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static const struct
+{
+  unsigned char a;
+  unsigned char b;
+} decode_table[13] = {
+  /*  0 */ { 0, 1 },
+  /*  1 */ { 0, 3 },
+  /*  2 */ { 1, 1 },
+  /*  3 */ { 1, 3 },
+  /*  4 */ { 2, 1 },
+  /*  5 */ { 2, 3 },
+  /*  6 */ { 3, 1 },
+  /*  7 */ { 3, 3 }, /* d = 1 */
+  /*  8 */ { 1, 0 },
+  /*  9 */ { 1, 2 },
+  /* 10 */ { 3, 0 },
+  /* 11 */ { 3, 2 },
+  /* 12 */ { 3, 3 }, /* d = 0 */
+
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+
+#define JACOBI_E(bits) ((bits) & 1)
+#define JACOBI_D(bits) (((bits)>>1) == 7) /* Gives 0 for don't care states. */
+
+static unsigned
+encode (unsigned a, unsigned b, unsigned d)
+{
+  unsigned i;
+
+  assert (d < 2);
+  assert (a < 4);
+  assert (b < 4);
+  assert ( (a | b ) & 1);
+
+  if (a == 3 && b == 3)
+    return d ? 7 : 12;
+
+  for (i = 0; i < 12; i++)
+    if (decode_table[i].a == a
+	&& decode_table[i].b == b)
+      return i;
+
+  abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+  unsigned bits;
+
+  for (bits = 0; bits < 208; bits++)
+    {
+      unsigned e, a, b, d_old, d, q;
+
+      if (bits && !(bits & 0xf))
+	printf("\n");
+
+      q = bits & 3;
+      d = (bits >> 2) & 1;
+
+      e = JACOBI_E (bits >> 3);
+      a = JACOBI_A (bits >> 3);
+      b = JACOBI_B (bits >> 3);
+      d_old = JACOBI_D (bits >> 3);
+
+      if (d != d_old && a == 3 && b == 3)
+	e ^= 1;
+
+      if (d == 1)
+	{
+	  if (b == 2)
+	    e ^= (q & (a >> 1)) ^ (q >> 1);
+	  a = (a - q * b) & 3;
+	}
+      else
+	{
+	  if (a == 2)
+	    e ^= (q & (b >> 1)) ^ (q >> 1);
+	  b = (b - q * a) & 3;
+	}
+
+      printf("%2d,", (encode (a, b, d) << 1) | e);
+    }
+  printf("\n");
+
+  return 0;
+}

diff --git a/gen-psqr.c b/gen-psqr.c
new file mode 100644
index 0000000..a5054c6
--- /dev/null
+++ b/gen-psqr.c

@@ -0,0 +1,585 @@
+/* Generate perfect square testing data.
+
+Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bootstrap.c"
+
+
+/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
+   (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
+   residues and non-residues modulo small factors of that modulus.
+
+   For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
+   function exists specifically because 2^24-1 and 2^48-1 have nice sets of
+   prime factors.  For other limb sizes it's considered, but if it doesn't
+   have good factors then mpn_mod_1 will be used instead.
+
+   When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
+   selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
+   with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
+   GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
+   calculation within a single limb.
+
+   In either case primes can be combined to make divisors.  The table data
+   then effectively indicates remainders which are quadratic residues mod
+   all the primes.  This sort of combining reduces the number of steps
+   needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
+   Nothing is gained or lost in terms of detections, the same total fraction
+   of non-residues will be identified.
+
+   Nothing particularly sophisticated is attempted for combining factors to
+   make divisors.  This is probably a kind of knapsack problem so it'd be
+   too hard to attempt anything completely general.  For the usual 32 and 64
+   bit limbs we get a good enough result just pairing the biggest and
+   smallest which fit together, repeatedly.
+
+   Another aim is to get powerful combinations, ie. divisors which identify
+   biggest fraction of non-residues, and have those run first.  Again for
+   the usual 32 and 64 bits it seems good enough just to pair for big
+   divisors then sort according to the resulting fraction of non-residues
+   identified.
+
+   Also in this program, a table sq_res_0x100 of residues modulo 256 is
+   generated.  This simply fills bits into limbs of the appropriate
+   build-time GMP_LIMB_BITS each.
+
+*/
+
+
+/* Normally we aren't using const in gen*.c programs, so as not to have to
+   bother figuring out if it works, but using it with f_cmp_divisor and
+   f_cmp_fraction avoids warnings from the qsort calls. */
+
+/* Same tests as gmp.h. */
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define HAVE_CONST        1
+#endif
+
+#if ! HAVE_CONST
+#define const
+#endif
+
+
+mpz_t  *sq_res_0x100;          /* table of limbs */
+int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
+int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
+double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
+
+int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
+int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
+int     max_divisor;       /* all divisors <= max_divisor */
+int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
+double  total_fraction;    /* of squares */
+mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
+mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
+mpz_t   pp_inverted;       /* invert_limb style inverse */
+mpz_t   mod_mask;          /* 2^mod_bits-1 */
+char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
+
+/* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
+struct rawfactor_t {
+  int     divisor;
+  int     multiplicity;
+};
+struct rawfactor_t  *rawfactor;
+int                 nrawfactor;
+
+/* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
+struct factor_t {
+  int     divisor;
+  mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
+  mpz_t   mask;      /* indicating squares mod divisor */
+  double  fraction;  /* squares/total */
+};
+struct factor_t  *factor;
+int              nfactor;       /* entries in use in factor array */
+int              factor_alloc;  /* entries allocated to factor array */
+
+
+int
+f_cmp_divisor (const void *parg, const void *qarg)
+{
+  const struct factor_t *p, *q;
+  p = (const struct factor_t *) parg;
+  q = (const struct factor_t *) qarg;
+  if (p->divisor > q->divisor)
+    return 1;
+  else if (p->divisor < q->divisor)
+    return -1;
+  else
+    return 0;
+}
+
+int
+f_cmp_fraction (const void *parg, const void *qarg)
+{
+  const struct factor_t *p, *q;
+  p = (const struct factor_t *) parg;
+  q = (const struct factor_t *) qarg;
+  if (p->fraction > q->fraction)
+    return 1;
+  else if (p->fraction < q->fraction)
+    return -1;
+  else
+    return 0;
+}
+
+/* Remove array[idx] by copying the remainder down, and adjust narray
+   accordingly.  */
+#define COLLAPSE_ELEMENT(array, idx, narray)                    \
+  do {                                                          \
+    memmove (&(array)[idx],					\
+	     &(array)[idx+1],					\
+	     ((narray)-((idx)+1)) * sizeof (array[0]));		\
+    (narray)--;                                                 \
+  } while (0)
+
+
+/* return n*2^p mod m */
+int
+mul_2exp_mod (int n, int p, int m)
+{
+  while (--p >= 0)
+    n = (2 * n) % m;
+  return n;
+}
+
+/* return -n mod m */
+int
+neg_mod (int n, int m)
+{
+  assert (n >= 0 && n < m);
+  return (n == 0 ? 0 : m-n);
+}
+
+/* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
+   "-(idx<<mod_bits)" can be a square modulo m.  */
+void
+square_mask (mpz_t mask, int m)
+{
+  int    p, i, r, idx;
+
+  p = mul_2exp_mod (1, mod_bits, m);
+  p = neg_mod (p, m);
+
+  mpz_set_ui (mask, 0L);
+  for (i = 0; i < m; i++)
+    {
+      r = (i * i) % m;
+      idx = (r * p) % m;
+      mpz_setbit (mask, (unsigned long) idx);
+    }
+}
+
+void
+generate_sq_res_0x100 (int limb_bits)
+{
+  int  i, res;
+
+  nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
+  sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
+
+  for (i = 0; i < nsq_res_0x100; i++)
+    mpz_init_set_ui (sq_res_0x100[i], 0L);
+
+  for (i = 0; i < 0x100; i++)
+    {
+      res = (i * i) % 0x100;
+      mpz_setbit (sq_res_0x100[res / limb_bits],
+                  (unsigned long) (res % limb_bits));
+    }
+
+  sq_res_0x100_num = 0;
+  for (i = 0; i < nsq_res_0x100; i++)
+    sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
+  sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
+}
+
+void
+generate_mod (int limb_bits, int nail_bits)
+{
+  int    numb_bits = limb_bits - nail_bits;
+  int    i, divisor;
+
+  mpz_init_set_ui (pp, 0L);
+  mpz_init_set_ui (pp_norm, 0L);
+  mpz_init_set_ui (pp_inverted, 0L);
+
+  /* no more than limb_bits many factors in a one limb modulus (and of
+     course in reality nothing like that many) */
+  factor_alloc = limb_bits;
+  factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
+  rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
+
+  if (numb_bits % 4 != 0)
+    {
+      strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
+      goto use_pp;
+    }
+
+  max_divisor = 2*limb_bits;
+  max_divisor_bits = log2_ceil (max_divisor);
+
+  if (numb_bits / 4 < max_divisor_bits)
+    {
+      /* Wind back to one limb worth of max_divisor, if that will let us use
+         mpn_mod_34lsub1.  */
+      max_divisor = limb_bits;
+      max_divisor_bits = log2_ceil (max_divisor);
+
+      if (numb_bits / 4 < max_divisor_bits)
+        {
+          strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
+          goto use_pp;
+        }
+    }
+
+  {
+    /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
+    mpz_t  m, q, r;
+    int    multiplicity;
+
+    mod34_bits = (numb_bits / 4) * 3;
+
+    /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
+       the mod34_bits mark, adding the two halves for a remainder of at most
+       mod34_bits+1 many bits */
+    mod_bits = mod34_bits + 1;
+
+    mpz_init_set_ui (m, 1L);
+    mpz_mul_2exp (m, m, mod34_bits);
+    mpz_sub_ui (m, m, 1L);
+
+    mpz_init (q);
+    mpz_init (r);
+
+    for (i = 3; i <= max_divisor; i+=2)
+      {
+        if (! isprime (i))
+          continue;
+
+        mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
+        if (mpz_sgn (r) != 0)
+          continue;
+
+        /* if a repeated prime is found it's used as an i^n in one factor */
+        divisor = 1;
+        multiplicity = 0;
+        do
+          {
+            if (divisor > max_divisor / i)
+              break;
+            multiplicity++;
+            mpz_set (m, q);
+            mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
+          }
+        while (mpz_sgn (r) == 0);
+
+        assert (nrawfactor < factor_alloc);
+        rawfactor[nrawfactor].divisor = i;
+        rawfactor[nrawfactor].multiplicity = multiplicity;
+        nrawfactor++;
+      }
+
+    mpz_clear (m);
+    mpz_clear (q);
+    mpz_clear (r);
+  }
+
+  if (nrawfactor <= 2)
+    {
+      mpz_t  new_pp;
+
+      sprintf (mod34_excuse, "only %d small factor%s",
+               nrawfactor, nrawfactor == 1 ? "" : "s");
+
+    use_pp:
+      /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
+         tried with just one */
+      max_divisor = 2*limb_bits;
+      max_divisor_bits = log2_ceil (max_divisor);
+
+      mpz_init (new_pp);
+      nrawfactor = 0;
+      mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
+
+      /* one copy of each small prime */
+      mpz_set_ui (pp, 1L);
+      for (i = 3; i <= max_divisor; i+=2)
+        {
+          if (! isprime (i))
+            continue;
+
+          mpz_mul_ui (new_pp, pp, (unsigned long) i);
+          if (mpz_sizeinbase (new_pp, 2) > mod_bits)
+            break;
+          mpz_set (pp, new_pp);
+
+          assert (nrawfactor < factor_alloc);
+          rawfactor[nrawfactor].divisor = i;
+          rawfactor[nrawfactor].multiplicity = 1;
+          nrawfactor++;
+        }
+
+      /* Plus an extra copy of one or more of the primes selected, if that
+         still fits in max_divisor and the total in mod_bits.  Usually only
+         3 or 5 will be candidates */
+      for (i = nrawfactor-1; i >= 0; i--)
+        {
+          if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
+            continue;
+          mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
+          if (mpz_sizeinbase (new_pp, 2) > mod_bits)
+            continue;
+          mpz_set (pp, new_pp);
+
+          rawfactor[i].multiplicity++;
+        }
+
+      mod_bits = mpz_sizeinbase (pp, 2);
+
+      mpz_set (pp_norm, pp);
+      while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
+        mpz_add (pp_norm, pp_norm, pp_norm);
+
+      mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
+
+      mpz_clear (new_pp);
+    }
+
+  /* start the factor array */
+  for (i = 0; i < nrawfactor; i++)
+    {
+      int  j;
+      assert (nfactor < factor_alloc);
+      factor[nfactor].divisor = 1;
+      for (j = 0; j < rawfactor[i].multiplicity; j++)
+        factor[nfactor].divisor *= rawfactor[i].divisor;
+      nfactor++;
+    }
+
+ combine:
+  /* Combine entries in the factor array.  Combine the smallest entry with
+     the biggest one that will fit with it (ie. under max_divisor), then
+     repeat that with the new smallest entry. */
+  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
+  for (i = nfactor-1; i >= 1; i--)
+    {
+      if (factor[i].divisor <= max_divisor / factor[0].divisor)
+        {
+          factor[0].divisor *= factor[i].divisor;
+          COLLAPSE_ELEMENT (factor, i, nfactor);
+          goto combine;
+        }
+    }
+
+  total_fraction = 1.0;
+  for (i = 0; i < nfactor; i++)
+    {
+      mpz_init (factor[i].inverse);
+      mpz_invert_ui_2exp (factor[i].inverse,
+                          (unsigned long) factor[i].divisor,
+                          (unsigned long) mod_bits);
+
+      mpz_init (factor[i].mask);
+      square_mask (factor[i].mask, factor[i].divisor);
+
+      /* fraction of possible squares */
+      factor[i].fraction = (double) mpz_popcount (factor[i].mask)
+        / factor[i].divisor;
+
+      /* total fraction of possible squares */
+      total_fraction *= factor[i].fraction;
+    }
+
+  /* best tests first (ie. smallest fraction) */
+  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
+}
+
+void
+print (int limb_bits, int nail_bits)
+{
+  int    i;
+  mpz_t  mhi, mlo;
+
+  printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
+  printf ("\n");
+
+  printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
+          limb_bits, nail_bits);
+  printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
+          limb_bits, nail_bits);
+  printf ("#endif\n");
+  printf ("\n");
+
+  printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
+  printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
+          (1.0 - sq_res_0x100_fraction) * 100.0,
+          0x100 - sq_res_0x100_num);
+  printf ("static const mp_limb_t\n");
+  printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
+  for (i = 0; i < nsq_res_0x100; i++)
+    {
+      printf ("  CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, sq_res_0x100[i]);
+      printf ("),\n");
+    }
+  printf ("};\n");
+  printf ("\n");
+
+  if (mpz_sgn (pp) != 0)
+    {
+      printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
+      printf ("/* PERFSQR_PP = ");
+    }
+  else
+    printf ("/* 2^%d-1 = ", mod34_bits);
+  for (i = 0; i < nrawfactor; i++)
+    {
+      if (i != 0)
+        printf (" * ");
+      printf ("%d", rawfactor[i].divisor);
+      if (rawfactor[i].multiplicity != 1)
+        printf ("^%d", rawfactor[i].multiplicity);
+    }
+  printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
+
+  printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
+  if (mpz_sgn (pp) != 0)
+    {
+      printf ("#define PERFSQR_PP            CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp);
+      printf (")\n");
+      printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp_norm);
+      printf (")\n");
+      printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp_inverted);
+      printf (")\n");
+    }
+  printf ("\n");
+
+  mpz_init (mhi);
+  mpz_init (mlo);
+
+  printf ("/* This test identifies %.2f%% as non-squares. */\n",
+          (1.0 - total_fraction) * 100.0);
+  printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
+  printf ("  do {                              \\\n");
+  printf ("    mp_limb_t  r;                   \\\n");
+  if (mpz_sgn (pp) != 0)
+    printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
+  else
+    printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
+
+  for (i = 0; i < nfactor; i++)
+    {
+      printf ("                                    \\\n");
+      printf ("    /* %5.2f%% */                    \\\n",
+              (1.0 - factor[i].fraction) * 100.0);
+
+      printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
+              factor[i].divisor <= limb_bits ? 1 : 2,
+              factor[i].divisor);
+      mpz_out_str (stdout, 16, factor[i].inverse);
+      printf ("), \\\n");
+      printf ("                   CNST_LIMB(0x");
+
+      if ( factor[i].divisor <= limb_bits)
+        {
+          mpz_out_str (stdout, 16, factor[i].mask);
+        }
+      else
+        {
+          mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
+          mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
+          mpz_out_str (stdout, 16, mhi);
+          printf ("), CNST_LIMB(0x");
+          mpz_out_str (stdout, 16, mlo);
+        }
+      printf (")); \\\n");
+    }
+
+  printf ("  } while (0)\n");
+  printf ("\n");
+
+  printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
+          (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
+  printf ("\n");
+
+  printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
+  printf ("#define PERFSQR_DIVISORS  { 256,");
+  for (i = 0; i < nfactor; i++)
+      printf (" %d,", factor[i].divisor);
+  printf (" }\n");
+
+
+  mpz_clear (mhi);
+  mpz_clear (mlo);
+}
+
+int
+main (int argc, char *argv[])
+{
+  int  limb_bits, nail_bits;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[1]);
+  nail_bits = atoi (argv[2]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+
+  generate_sq_res_0x100 (limb_bits);
+  generate_mod (limb_bits, nail_bits);
+
+  print (limb_bits, nail_bits);
+
+  return 0;
+}

diff --git a/gen-sieve.c b/gen-sieve.c
new file mode 100644
index 0000000..7133918
--- /dev/null
+++ b/gen-sieve.c

@@ -0,0 +1,194 @@
+/* Generate primesieve data.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2021, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "bootstrap.c"
+
+static int
+bit_to_n (int bit) { return (bit*3+4)|1; }
+
+int
+generate (int limb_bits, int limit)
+{
+  mpz_t  limb;
+  int i, lb, pc, c, totpc, maxprime;
+
+  mpz_init (limb);
+
+  printf ("/* This file generated by gen-sieve.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#if GMP_LIMB_BITS != %d\n", limb_bits);
+  printf ("Error, error, this data is for %d bits\n", limb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("#define PRIMESIEVE_INIT_TABLE ");
+
+  maxprime = 3;
+  lb = pc = c = totpc = 0;
+  for (i = 0; i < limit; i++)
+    {
+      if (! isprime (bit_to_n (i)))
+	mpz_setbit (limb, lb);
+      else
+	maxprime = bit_to_n (i), ++pc;
+      ++lb;
+      if (lb == limb_bits)
+	{
+	  ++c;
+	  printf ("\\\n\tCNST_LIMB (0x");
+	  mpz_out_str (stdout, -16, limb);
+	  printf ("),\t/* %d - %d (%d primes) */\t", bit_to_n (i + 1 - limb_bits),
+		  bit_to_n (i + 1) - 1, pc);
+	  totpc += pc;
+	  lb = pc = 0;
+	  mpz_set_ui (limb, 0);
+	}
+    }
+
+  if ((mpz_sgn (limb) | lb | pc) != 0)
+    {
+      printf ("\ngen-sieve: Internal error, during generate (%d, %d).\n", limb_bits,  limit);
+      abort();
+    }
+
+  mpz_clear (limb);
+
+  printf ("\n");
+  printf ("#define PRIMESIEVE_NUMBEROF_TABLE %d\n", c);
+
+  printf ("/* #define PRIMESIEVE_PRIMES_IN_TABLE %d */\n", totpc);
+  printf ("#define PRIMESIEVE_HIGHEST_PRIME %d\n", maxprime);
+  printf ("/* #define PRIMESIEVE_FIRST_UNCHECKED %d */\n", bit_to_n (limit));
+
+  return c;
+}
+
+void
+setmask (mpz_t mask, int a, int b)
+{
+  mpz_set_ui (mask, 0);
+  for (unsigned i = 0; i < 2 * a * b; ++i)
+    if ((bit_to_n (i) % a == 0) || (bit_to_n (i) % b == 0))
+      mpz_setbit (mask, i);
+}
+
+void
+gen_sieve_masks (int limb_bits) {
+  mpz_t  mask, limb;
+
+  mpz_init (mask);
+  mpz_init (limb);
+
+  printf ("\n");
+  if (limb_bits > 60 && limb_bits < 91)
+    {
+      setmask (mask, 5, 11);
+
+      mpz_tdiv_r_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_MASK1 CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+      mpz_tdiv_q_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_MASKT CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+
+      setmask (mask, 7, 13);
+
+      mpz_tdiv_r_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_2MSK1 CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+      mpz_tdiv_q_2exp (mask, mask, limb_bits);
+      mpz_tdiv_r_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_2MSK2 CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+      mpz_tdiv_q_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_2MSKT CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+    }
+  else if (limb_bits > 23 && limb_bits < 36)
+    {
+      setmask (mask, 5, 7);
+
+      mpz_tdiv_r_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_MASK1 CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+      mpz_tdiv_q_2exp (mask, mask, limb_bits);
+      mpz_tdiv_r_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_MASK2 CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+      mpz_tdiv_q_2exp (limb, mask, limb_bits);
+      printf ("#define SIEVE_MASKT CNST_LIMB(0x");
+      mpz_out_str (stdout, -16, limb);
+      printf (")\n");
+    }
+  printf ("\n");
+
+  mpz_clear (limb);
+  mpz_clear (mask);
+}
+
+/* 5*2 = 10
+   7*2 = 14
+   5*7*2 = 70 (2*35, 3*24, 4*18, 5*14...)
+   5*11*2 = 110 (2*55, 3*37, 4*28, 5*22...)
+   5*13*2 = 130 (2*65, 3*44, 4*33, 5*26...)
+   7*11*2 = 154 (2*77, 3*52, 4*39, 5*31...)
+   7*13*2 = 182 (2*91, 3*61, 4*46, 5*37...)
+*/
+
+int
+main (int argc, char *argv[])
+{
+  int  limb_bits, limit;
+
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: gen-sieve <limbbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[1]);
+
+  limit = 64 * 28; /* bits in the presieved sieve */
+  if (limit % limb_bits != 0)
+    limit += limb_bits - limit % limb_bits;
+  generate (limb_bits, limit);
+  gen_sieve_masks (limb_bits);
+
+  return 0;
+}

diff --git a/gen-trialdivtab.c b/gen-trialdivtab.c
new file mode 100644
index 0000000..218c322
--- /dev/null
+++ b/gen-trialdivtab.c

@@ -0,0 +1,301 @@
+/* gen-trialdivtab.c
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 2009, 2012, 2013, 2016, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+  Generate tables for fast, division-free trial division for GMP.
+
+  There is one main table, ptab.  It contains primes, multiplied together, and
+  several types of pre-computed inverses.  It refers to tables of the type
+  dtab, via the last two indices.  That table contains the individual primes in
+  the range, except that the primes are not actually included in the table (see
+  the P macro; it sneakingly excludes the primes themselves).  Instead, the
+  dtab tables contains tuples for each prime (modular-inverse, limit) used for
+  divisibility checks.
+
+  This interface is not intended for division of very many primes, since then
+  other algorithms apply.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "bootstrap.c"
+
+int sumspills (mpz_t, mpz_t *, int);
+void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t);
+
+int limb_bits;
+
+mpz_t B;
+
+int
+main (int argc, char *argv[])
+{
+  int t, p;
+  mpz_t ppp, acc, inv, gmp_numb_max, tmp, Bhalf;
+  mpz_t pre[7];
+  int i;
+  int start_p, end_p, interval_start, interval_end, omitted_p;
+  const char *endtok;
+  int stop;
+  int np, start_idx;
+
+  if (argc < 2)
+    {
+      fprintf (stderr, "usage: %s bits endprime\n", argv[0]);
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[1]);
+
+  end_p = 1290;			/* default end prime */
+  if (argc == 3)
+    end_p = atoi (argv[2]);
+
+  printf ("#if GMP_LIMB_BITS != %d\n", limb_bits);
+  printf ("#error This table is for GMP_LIMB_BITS = %d\n", limb_bits);
+  printf ("#endif\n\n");
+
+  printf ("#if GMP_NAIL_BITS != 0\n");
+  printf ("#error This table does not support nails\n");
+  printf ("#endif\n\n");
+
+  for (i = 0; i < 7; i++)
+    mpz_init (pre[i]);
+
+  mpz_init (B);
+  mpz_setbit (B, limb_bits);
+  mpz_init_set (gmp_numb_max, B);
+  mpz_sub_ui (gmp_numb_max, gmp_numb_max, 1);
+
+  mpz_init (tmp);
+  mpz_init (inv);
+
+  mpz_init (Bhalf);
+  mpz_setbit (Bhalf, limb_bits - 1);
+
+  start_p = 3;
+
+  mpz_init_set_ui (ppp, 1);
+  mpz_init (acc);
+  interval_start = start_p;
+  omitted_p = 3;
+  interval_end = 0;
+
+/*  printf ("static struct gmp_primes_dtab gmp_primes_dtab[] = {\n"); */
+
+  printf ("#ifdef WANT_dtab\n");
+
+  for (t = start_p; t <= end_p; t += 2)
+    {
+      if (! isprime (t))
+	continue;
+
+      mpz_mul_ui (acc, ppp, t);
+      stop = mpz_cmp (acc, Bhalf) >= 0;
+      if (!stop)
+	{
+	  mpn_mod_1s_4p_cps (pre, acc);
+	  stop = sumspills (acc, pre + 2, 5);
+	}
+
+      if (stop)
+	{
+	  for (p = interval_start; p <= interval_end; p += 2)
+	    {
+	      if (! isprime (p))
+		continue;
+
+	      printf ("  P(%d,", (int) p);
+	      mpz_invert_ui_2exp (inv, p, limb_bits);
+	      printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, inv);  printf ("),");
+
+	      mpz_tdiv_q_ui (tmp, gmp_numb_max, p);
+	      printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, tmp);
+	      printf (")),\n");
+	    }
+	  mpz_set_ui (ppp, t);
+	  interval_start = t;
+	  omitted_p = t;
+	}
+      else
+	{
+	  mpz_set (ppp, acc);
+	}
+      interval_end = t;
+    }
+  printf ("#define SMALLEST_OMITTED_PRIME %d\n", (int) omitted_p);
+  printf ("#endif\n");
+
+  printf ("#ifdef WANT_ptab\n");
+
+/*  printf ("static struct gmp_primes_ptab gmp_primes_ptab[] = {\n"); */
+
+  endtok = "";
+
+  mpz_set_ui (ppp, 1);
+  interval_start = start_p;
+  interval_end = 0;
+  np = 0;
+  start_idx = 0;
+  for (t = start_p; t <= end_p; t += 2)
+    {
+      if (! isprime (t))
+	continue;
+
+      mpz_mul_ui (acc, ppp, t);
+
+      stop = mpz_cmp (acc, Bhalf) >= 0;
+      if (!stop)
+	{
+	  mpn_mod_1s_4p_cps (pre, acc);
+	  stop = sumspills (acc, pre + 2, 5);
+	}
+
+      if (stop)
+	{
+	  mpn_mod_1s_4p_cps (pre, ppp);
+	  printf ("%s", endtok);
+	  printf ("  {CNST_LIMB(0x");  mpz_out_str (stdout, 16, ppp);
+	  printf ("),{CNST_LIMB(0x");  mpz_out_str (stdout, 16, pre[0]);
+	  printf ("),%d", (int) PTR(pre[1])[0]);
+	  for (i = 0; i < 5; i++)
+	    {
+	      printf (",");
+	      printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, pre[2 + i]);
+	      printf (")");
+	    }
+	  printf ("},");
+	  printf ("%d,", start_idx);
+	  printf ("%d}", np - start_idx);
+
+	  endtok = ",\n";
+	  mpz_set_ui (ppp, t);
+	  interval_start = t;
+	  start_idx = np;
+	}
+      else
+	{
+	  mpz_set (ppp, acc);
+	}
+      interval_end = t;
+      np++;
+    }
+
+  printf ("\n");
+  printf ("#endif\n");
+
+  return 0;
+}
+
+unsigned long
+mpz_log2 (mpz_t x)
+{
+  return mpz_sgn (x) ? mpz_sizeinbase (x, 2) : 0;
+}
+
+void
+mpn_mod_1s_4p_cps (mpz_t cps[7], mpz_t bparm)
+{
+  mpz_t b, bi;
+  mpz_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mpz_t t;
+  int cnt;
+
+  mpz_init_set (b, bparm);
+
+  cnt = limb_bits - mpz_log2 (b);
+
+  mpz_init (bi);
+  mpz_init (t);
+  mpz_init (B1modb);
+  mpz_init (B2modb);
+  mpz_init (B3modb);
+  mpz_init (B4modb);
+  mpz_init (B5modb);
+
+  mpz_set_ui (t, 1);
+  mpz_mul_2exp (t, t, limb_bits - cnt);
+  mpz_sub (t, t, b);
+  mpz_mul_2exp (t, t, limb_bits);
+  mpz_tdiv_q (bi, t, b);		/* bi = B^2/b, except msb */
+
+  mpz_set_ui (t, 1);
+  mpz_mul_2exp (t, t, limb_bits);	/* t = B */
+  mpz_tdiv_r (B1modb, t, b);
+
+  mpz_mul_2exp (t, B1modb, limb_bits);
+  mpz_tdiv_r (B2modb, t, b);
+
+  mpz_mul_2exp (t, B2modb, limb_bits);
+  mpz_tdiv_r (B3modb, t, b);
+
+  mpz_mul_2exp (t, B3modb, limb_bits);
+  mpz_tdiv_r (B4modb, t, b);
+
+  mpz_mul_2exp (t, B4modb, limb_bits);
+  mpz_tdiv_r (B5modb, t, b);
+
+  mpz_set (cps[0], bi);
+  mpz_set_ui (cps[1], cnt);
+  mpz_tdiv_q_2exp (cps[2], B1modb, 0);
+  mpz_tdiv_q_2exp (cps[3], B2modb, 0);
+  mpz_tdiv_q_2exp (cps[4], B3modb, 0);
+  mpz_tdiv_q_2exp (cps[5], B4modb, 0);
+  mpz_tdiv_q_2exp (cps[6], B5modb, 0);
+
+  mpz_clear (b);
+  mpz_clear (bi);
+  mpz_clear (t);
+  mpz_clear (B1modb);
+  mpz_clear (B2modb);
+  mpz_clear (B3modb);
+  mpz_clear (B4modb);
+  mpz_clear (B5modb);
+}
+
+int
+sumspills (mpz_t ppp, mpz_t *a, int n)
+{
+  mpz_t s;
+  int i, ret;
+
+  mpz_init_set (s, a[0]);
+
+  for (i = 1; i < n; i++)
+    {
+      mpz_add (s, s, a[i]);
+    }
+  ret = mpz_cmp (s, B) >= 0;
+  mpz_clear (s);
+
+  return ret;
+}

diff --git a/gmp-h.in b/gmp-h.in
new file mode 100644
index 0000000..8598e97
--- /dev/null
+++ b/gmp-h.in

@@ -0,0 +1,2344 @@
+/* Definitions for GNU multiple precision functions.   -*- mode: c -*-
+
+Copyright 1991, 1993-1997, 1999-2016, 2020, 2021 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#ifndef __GMP_H__
+
+#if defined (__cplusplus)
+#include <iosfwd>   /* for std::istream, std::ostream, std::string */
+#include <cstdio>
+#endif
+
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+#define __GMP_HAVE_HOST_CPU_FAMILY_power   @HAVE_HOST_CPU_FAMILY_power@
+#define __GMP_HAVE_HOST_CPU_FAMILY_powerpc @HAVE_HOST_CPU_FAMILY_powerpc@
+#define GMP_LIMB_BITS                      @GMP_LIMB_BITS@
+#define GMP_NAIL_BITS                      @GMP_NAIL_BITS@
+#endif
+#define GMP_NUMB_BITS     (GMP_LIMB_BITS - GMP_NAIL_BITS)
+#define GMP_NUMB_MASK     ((~ __GMP_CAST (mp_limb_t, 0)) >> GMP_NAIL_BITS)
+#define GMP_NUMB_MAX      GMP_NUMB_MASK
+#define GMP_NAIL_MASK     (~ GMP_NUMB_MASK)
+
+
+#ifndef __GNU_MP__
+#define __GNU_MP__ 6
+
+#include <stddef.h>    /* for size_t */
+#include <limits.h>
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+@DEFN_LONG_LONG_LIMB@
+#define __GMP_LIBGMP_DLL  @LIBGMP_DLL@
+#endif
+
+
+/* __GMP_DECLSPEC supports Windows DLL versions of libgmp, and is empty in
+   all other circumstances.
+
+   When compiling objects for libgmp, __GMP_DECLSPEC is an export directive,
+   or when compiling for an application it's an import directive.  The two
+   cases are differentiated by __GMP_WITHIN_GMP defined by the GMP Makefiles
+   (and not defined from an application).
+
+   __GMP_DECLSPEC_XX is similarly used for libgmpxx.  __GMP_WITHIN_GMPXX
+   indicates when building libgmpxx, and in that case libgmpxx functions are
+   exports, but libgmp functions which might get called are imports.
+
+   Libtool DLL_EXPORT define is not used.
+
+   There's no attempt to support GMP built both static and DLL.  Doing so
+   would mean applications would have to tell us which of the two is going
+   to be used when linking, and that seems very tedious and error prone if
+   using GMP by hand, and equally tedious from a package since autoconf and
+   automake don't give much help.
+
+   __GMP_DECLSPEC is required on all documented global functions and
+   variables, the various internals in gmp-impl.h etc can be left unadorned.
+   But internals used by the test programs or speed measuring programs
+   should have __GMP_DECLSPEC, and certainly constants or variables must
+   have it or the wrong address will be resolved.
+
+   In gcc __declspec can go at either the start or end of a prototype.
+
+   In Microsoft C __declspec must go at the start, or after the type like
+   void __declspec(...) *foo()".  There's no __dllexport or anything to
+   guard against someone foolish #defining dllexport.  _export used to be
+   available, but no longer.
+
+   In Borland C _export still exists, but needs to go after the type, like
+   "void _export foo();".  Would have to change the __GMP_DECLSPEC syntax to
+   make use of that.  Probably more trouble than it's worth.  */
+
+#if defined (__GNUC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(__dllexport__)
+#define __GMP_DECLSPEC_IMPORT  __declspec(__dllimport__)
+#endif
+#if defined (_MSC_VER) || defined (__BORLANDC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(dllexport)
+#define __GMP_DECLSPEC_IMPORT  __declspec(dllimport)
+#endif
+#ifdef __WATCOMC__
+#define __GMP_DECLSPEC_EXPORT  __export
+#define __GMP_DECLSPEC_IMPORT  __import
+#endif
+#ifdef __IBMC__
+#define __GMP_DECLSPEC_EXPORT  _Export
+#define __GMP_DECLSPEC_IMPORT  _Import
+#endif
+
+#if __GMP_LIBGMP_DLL
+#ifdef __GMP_WITHIN_GMP
+/* compiling to go into a DLL libgmp */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into an application which will link to a DLL libgmp */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC
+#endif
+
+
+#ifdef __GMP_SHORT_LIMB
+typedef unsigned int		mp_limb_t;
+typedef int			mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int	mp_limb_t;
+typedef long long int		mp_limb_signed_t;
+#else
+typedef unsigned long int	mp_limb_t;
+typedef long int		mp_limb_signed_t;
+#endif
+#endif
+typedef unsigned long int	mp_bitcnt_t;
+
+/* For reference, note that the name __mpz_struct gets into C++ mangled
+   function names, which means although the "__" suggests an internal, we
+   must leave this name for binary compatibility.  */
+typedef struct
+{
+  int _mp_alloc;		/* Number of *limbs* allocated and pointed
+				   to by the _mp_d field.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpz_struct;
+
+#endif /* __GNU_MP__ */
+
+
+typedef __mpz_struct MP_INT;    /* gmp 1 source compatibility */
+typedef __mpz_struct mpz_t[1];
+
+typedef mp_limb_t *		mp_ptr;
+typedef const mp_limb_t *	mp_srcptr;
+#if defined (_CRAY) && ! defined (_CRAYMPP)
+/* plain `int' is much faster (48 bits) */
+#define __GMP_MP_SIZE_T_INT     1
+typedef int			mp_size_t;
+typedef int			mp_exp_t;
+#else
+#define __GMP_MP_SIZE_T_INT     0
+typedef long int		mp_size_t;
+typedef long int		mp_exp_t;
+#endif
+
+typedef struct
+{
+  __mpz_struct _mp_num;
+  __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct MP_RAT;    /* gmp 1 source compatibility */
+typedef __mpq_struct mpq_t[1];
+
+typedef struct
+{
+  int _mp_prec;			/* Max precision, in number of `mp_limb_t's.
+				   Set by mpf_init and modified by
+				   mpf_set_prec.  The area pointed to by the
+				   _mp_d field contains `prec' + 1 limbs.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_exp_t _mp_exp;		/* Exponent, in the base of `mp_limb_t'.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpf_struct;
+
+/* typedef __mpf_struct MP_FLOAT; */
+typedef __mpf_struct mpf_t[1];
+
+/* Available random number generation algorithms.  */
+typedef enum
+{
+  GMP_RAND_ALG_DEFAULT = 0,
+  GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential.  */
+} gmp_randalg_t;
+
+/* Random state struct.  */
+typedef struct
+{
+  mpz_t _mp_seed;	  /* _mp_d member points to state of the generator. */
+  gmp_randalg_t _mp_alg;  /* Currently unused. */
+  union {
+    void *_mp_lc;         /* Pointer to function pointers structure.  */
+  } _mp_algdata;
+} __gmp_randstate_struct;
+typedef __gmp_randstate_struct gmp_randstate_t[1];
+
+/* Types for function declarations in gmp files.  */
+/* ??? Should not pollute user name space with these ??? */
+typedef const __mpz_struct *mpz_srcptr;
+typedef __mpz_struct *mpz_ptr;
+typedef const __mpf_struct *mpf_srcptr;
+typedef __mpf_struct *mpf_ptr;
+typedef const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+typedef __gmp_randstate_struct *gmp_randstate_ptr;
+typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
+
+
+#if __GMP_LIBGMP_DLL
+#ifdef __GMP_WITHIN_GMPXX
+/* compiling to go into a DLL libgmpxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into a application which will link to a DLL libgmpxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC_XX
+#endif
+
+
+#ifndef __MPN
+#define __MPN(x) __gmpn_##x
+#endif
+
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */		\
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */		\
+  || defined (__STDIO_LOADED)         /* VMS */			\
+  || defined (_STDIO)                 /* HPE NonStop */         \
+  || defined (__DEFINED_FILE)         /* musl */
+#define _GMP_H_HAVE_FILE 1
+#endif
+
+/* In ISO C, if a prototype involving "struct obstack *" is given without
+   that structure defined, then the struct is scoped down to just the
+   prototype, causing a conflict if it's subsequently defined for real.  So
+   only give prototypes if we've got obstack.h.  */
+#if defined (_OBSTACK_H)   /* glibc <obstack.h> */
+#define _GMP_H_HAVE_OBSTACK 1
+#endif
+
+/* The prototypes for gmp_vprintf etc are provided only if va_list is defined,
+   via an application having included <stdarg.h>.  Usually va_list is a typedef
+   so can't be tested directly, but C99 specifies that va_start is a macro.
+
+   <stdio.h> will define some sort of va_list for vprintf and vfprintf, but
+   let's not bother trying to use that since it's not standard and since
+   application uses for gmp_vprintf etc will almost certainly require the
+   whole <stdarg.h> anyway.  */
+
+#ifdef va_start
+#define _GMP_H_HAVE_VA_LIST 1
+#endif
+
+/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */
+#if defined (__GNUC__) && defined (__GNUC_MINOR__)
+#define __GMP_GNUC_PREREQ(maj, min) \
+  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GMP_GNUC_PREREQ(maj, min)  0
+#endif
+
+/* "pure" is in gcc 2.96 and up, see "(gcc)Function Attributes".  Basically
+   it means a function does nothing but examine its arguments and memory
+   (global or via arguments) to generate a return value, but changes nothing
+   and has no side-effects.  __GMP_NO_ATTRIBUTE_CONST_PURE lets
+   tune/common.c etc turn this off when trying to write timing loops.  */
+#if __GMP_GNUC_PREREQ (2,96) && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
+#define __GMP_ATTRIBUTE_PURE   __attribute__ ((__pure__))
+#else
+#define __GMP_ATTRIBUTE_PURE
+#endif
+
+
+/* __GMP_CAST allows us to use static_cast in C++, so our macros are clean
+   to "g++ -Wold-style-cast".
+
+   Casts in "extern inline" code within an extern "C" block don't induce
+   these warnings, so __GMP_CAST only needs to be used on documented
+   macros.  */
+
+#ifdef __cplusplus
+#define __GMP_CAST(type, expr)  (static_cast<type> (expr))
+#else
+#define __GMP_CAST(type, expr)  ((type) (expr))
+#endif
+
+
+/* An empty "throw ()" means the function doesn't throw any C++ exceptions,
+   this can save some stack frame info in applications.
+
+   Currently it's given only on functions which never divide-by-zero etc,
+   don't allocate memory, and are expected to never need to allocate memory.
+   This leaves open the possibility of a C++ throw from a future GMP
+   exceptions scheme.
+
+   mpz_set_ui etc are omitted to leave open the lazy allocation scheme
+   described in doc/tasks.html.  mpz_get_d etc are omitted to leave open
+   exceptions for float overflows.
+
+   Note that __GMP_NOTHROW must be given on any inlines the same as on their
+   prototypes (for g++ at least, where they're used together).  Note also
+   that g++ 3.0 demands that __GMP_NOTHROW is before other attributes like
+   __GMP_ATTRIBUTE_PURE.  */
+
+#if defined (__cplusplus)
+#if __cplusplus >= 201103L
+#define __GMP_NOTHROW  noexcept
+#else
+#define __GMP_NOTHROW  throw ()
+#endif
+#else
+#define __GMP_NOTHROW
+#endif
+
+
+/* PORTME: What other compilers have a useful "extern inline"?  "static
+   inline" would be an acceptable substitute if the compiler (or linker)
+   discards unused statics.  */
+
+ /* gcc has __inline__ in all modes, including strict ansi.  Give a prototype
+    for an inline too, so as to correctly specify "dllimport" on windows, in
+    case the function is called rather than inlined.
+    GCC 4.3 and above with -std=c99 or -std=gnu99 implements ISO C99
+    inline semantics, unless -fgnu89-inline is used.  */
+#ifdef __GNUC__
+#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2) \
+  || (defined __GNUC_GNU_INLINE__ && defined __cplusplus)
+#define __GMP_EXTERN_INLINE extern __inline__ __attribute__ ((__gnu_inline__))
+#else
+#define __GMP_EXTERN_INLINE      extern __inline__
+#endif
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+/* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1
+   strict ANSI mode.  Inlining is done even when not optimizing (ie. -O0
+   mode, which is the default), but an unnecessary local copy of foo is
+   emitted unless -O is used.  "extern __inline" is accepted, but the
+   "extern" appears to be ignored, ie. it becomes a plain global function
+   but which is inlined within its file.  Don't know if all old versions of
+   DEC C supported __inline, but as a start let's do the right thing for
+   current versions.  */
+#ifdef __DECC
+#define __GMP_EXTERN_INLINE  static __inline
+#endif
+
+/* SCO OpenUNIX 8 cc supports "static inline foo()" but not in -Xc strict
+   ANSI mode (__STDC__ is 1 in that mode).  Inlining only actually takes
+   place under -O.  Without -O "foo" seems to be emitted whether it's used
+   or not, which is wasteful.  "extern inline foo()" isn't useful, the
+   "extern" is apparently ignored, so foo is inlined if possible but also
+   emitted as a global, which causes multiple definition errors when
+   building a shared libgmp.  */
+#ifdef __SCO_VERSION__
+#if __SCO_VERSION__ > 400000000 && __STDC__ != 1 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  static inline
+#endif
+#endif
+
+/* Microsoft's C compiler accepts __inline */
+#ifdef _MSC_VER
+#define __GMP_EXTERN_INLINE  __inline
+#endif
+
+/* Recent enough Sun C compilers want "inline" */
+#if defined (__SUNPRO_C) && __SUNPRO_C >= 0x560 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  inline
+#endif
+
+/* Somewhat older Sun C compilers want "static inline" */
+#if defined (__SUNPRO_C) && __SUNPRO_C >= 0x540 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  static inline
+#endif
+
+
+/* C++ always has "inline" and since it's a normal feature the linker should
+   discard duplicate non-inlined copies, or if it doesn't then that's a
+   problem for everyone, not just GMP.  */
+#if defined (__cplusplus) && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  inline
+#endif
+
+/* Don't do any inlining within a configure run, since if the compiler ends
+   up emitting copies of the code into the object file it can end up
+   demanding the various support routines (like mpn_popcount) for linking,
+   making the "alloca" test and perhaps others fail.  And on hppa ia64 a
+   pre-release gcc 3.2 was seen not respecting the "extern" in "extern
+   __inline__", triggering this problem too.  */
+#if defined (__GMP_WITHIN_CONFIGURE) && ! __GMP_WITHIN_CONFIGURE_INLINE
+#undef __GMP_EXTERN_INLINE
+#endif
+
+/* By default, don't give a prototype when there's going to be an inline
+   version.  Note in particular that Cray C++ objects to the combination of
+   prototype and inline.  */
+#ifdef __GMP_EXTERN_INLINE
+#ifndef __GMP_INLINE_PROTOTYPES
+#define __GMP_INLINE_PROTOTYPES  0
+#endif
+#else
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+
+#define __GMP_ABS(x)   ((x) >= 0 ? (x) : -(x))
+#define __GMP_MAX(h,i) ((h) > (i) ? (h) : (i))
+
+
+/* __builtin_expect is in gcc 3.0, and not in 2.95. */
+#if __GMP_GNUC_PREREQ (3,0)
+#define __GMP_LIKELY(cond)    __builtin_expect ((cond) != 0, 1)
+#define __GMP_UNLIKELY(cond)  __builtin_expect ((cond) != 0, 0)
+#else
+#define __GMP_LIKELY(cond)    (cond)
+#define __GMP_UNLIKELY(cond)  (cond)
+#endif
+
+#ifdef _CRAY
+#define __GMP_CRAY_Pragma(str)  _Pragma (str)
+#else
+#define __GMP_CRAY_Pragma(str)
+#endif
+
+
+/* Allow direct user access to numerator and denominator of an mpq_t object.  */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+
+#if defined (__cplusplus)
+extern "C" {
+using std::FILE;
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+__GMP_DECLSPEC void mp_set_memory_functions (void *(*) (size_t),
+				      void *(*) (void *, size_t, size_t),
+				      void (*) (void *, size_t)) __GMP_NOTHROW;
+
+#define mp_get_memory_functions __gmp_get_memory_functions
+__GMP_DECLSPEC void mp_get_memory_functions (void *(**) (size_t),
+				      void *(**) (void *, size_t, size_t),
+				      void (**) (void *, size_t)) __GMP_NOTHROW;
+
+#define mp_bits_per_limb __gmp_bits_per_limb
+__GMP_DECLSPEC extern const int mp_bits_per_limb;
+
+#define gmp_errno __gmp_errno
+__GMP_DECLSPEC extern int gmp_errno;
+
+#define gmp_version __gmp_version
+__GMP_DECLSPEC extern const char * const gmp_version;
+
+
+/**************** Random number routines.  ****************/
+
+/* obsolete */
+#define gmp_randinit __gmp_randinit
+__GMP_DECLSPEC void gmp_randinit (gmp_randstate_ptr, gmp_randalg_t, ...);
+
+#define gmp_randinit_default __gmp_randinit_default
+__GMP_DECLSPEC void gmp_randinit_default (gmp_randstate_ptr);
+
+#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
+__GMP_DECLSPEC void gmp_randinit_lc_2exp (gmp_randstate_ptr, mpz_srcptr, unsigned long int, mp_bitcnt_t);
+
+#define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size
+__GMP_DECLSPEC int gmp_randinit_lc_2exp_size (gmp_randstate_ptr, mp_bitcnt_t);
+
+#define gmp_randinit_mt __gmp_randinit_mt
+__GMP_DECLSPEC void gmp_randinit_mt (gmp_randstate_ptr);
+
+#define gmp_randinit_set __gmp_randinit_set
+__GMP_DECLSPEC void gmp_randinit_set (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+#define gmp_randseed __gmp_randseed
+__GMP_DECLSPEC void gmp_randseed (gmp_randstate_ptr, mpz_srcptr);
+
+#define gmp_randseed_ui __gmp_randseed_ui
+__GMP_DECLSPEC void gmp_randseed_ui (gmp_randstate_ptr, unsigned long int);
+
+#define gmp_randclear __gmp_randclear
+__GMP_DECLSPEC void gmp_randclear (gmp_randstate_ptr);
+
+#define gmp_urandomb_ui __gmp_urandomb_ui
+__GMP_DECLSPEC unsigned long gmp_urandomb_ui (gmp_randstate_ptr, unsigned long);
+
+#define gmp_urandomm_ui __gmp_urandomm_ui
+__GMP_DECLSPEC unsigned long gmp_urandomm_ui (gmp_randstate_ptr, unsigned long);
+
+
+/**************** Formatted output routines.  ****************/
+
+#define gmp_asprintf __gmp_asprintf
+__GMP_DECLSPEC int gmp_asprintf (char **, const char *, ...);
+
+#define gmp_fprintf __gmp_fprintf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fprintf (FILE *, const char *, ...);
+#endif
+
+#define gmp_obstack_printf __gmp_obstack_printf
+#if defined (_GMP_H_HAVE_OBSTACK)
+__GMP_DECLSPEC int gmp_obstack_printf (struct obstack *, const char *, ...);
+#endif
+
+#define gmp_obstack_vprintf __gmp_obstack_vprintf
+#if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_obstack_vprintf (struct obstack *, const char *, va_list);
+#endif
+
+#define gmp_printf __gmp_printf
+__GMP_DECLSPEC int gmp_printf (const char *, ...);
+
+#define gmp_snprintf __gmp_snprintf
+__GMP_DECLSPEC int gmp_snprintf (char *, size_t, const char *, ...);
+
+#define gmp_sprintf __gmp_sprintf
+__GMP_DECLSPEC int gmp_sprintf (char *, const char *, ...);
+
+#define gmp_vasprintf __gmp_vasprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vasprintf (char **, const char *, va_list);
+#endif
+
+#define gmp_vfprintf __gmp_vfprintf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfprintf (FILE *, const char *, va_list);
+#endif
+
+#define gmp_vprintf __gmp_vprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vprintf (const char *, va_list);
+#endif
+
+#define gmp_vsnprintf __gmp_vsnprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsnprintf (char *, size_t, const char *, va_list);
+#endif
+
+#define gmp_vsprintf __gmp_vsprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsprintf (char *, const char *, va_list);
+#endif
+
+
+/**************** Formatted input routines.  ****************/
+
+#define gmp_fscanf __gmp_fscanf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fscanf (FILE *, const char *, ...);
+#endif
+
+#define gmp_scanf __gmp_scanf
+__GMP_DECLSPEC int gmp_scanf (const char *, ...);
+
+#define gmp_sscanf __gmp_sscanf
+__GMP_DECLSPEC int gmp_sscanf (const char *, const char *, ...);
+
+#define gmp_vfscanf __gmp_vfscanf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfscanf (FILE *, const char *, va_list);
+#endif
+
+#define gmp_vscanf __gmp_vscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vscanf (const char *, va_list);
+#endif
+
+#define gmp_vsscanf __gmp_vsscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsscanf (const char *, const char *, va_list);
+#endif
+
+
+/**************** Integer (i.e. Z) routines.  ****************/
+
+#define _mpz_realloc __gmpz_realloc
+#define mpz_realloc __gmpz_realloc
+__GMP_DECLSPEC void *_mpz_realloc (mpz_ptr, mp_size_t);
+
+#define mpz_abs __gmpz_abs
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs)
+__GMP_DECLSPEC void mpz_abs (mpz_ptr, mpz_srcptr);
+#endif
+
+#define mpz_add __gmpz_add
+__GMP_DECLSPEC void mpz_add (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_add_ui __gmpz_add_ui
+__GMP_DECLSPEC void mpz_add_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_addmul __gmpz_addmul
+__GMP_DECLSPEC void mpz_addmul (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_addmul_ui __gmpz_addmul_ui
+__GMP_DECLSPEC void mpz_addmul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_and __gmpz_and
+__GMP_DECLSPEC void mpz_and (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_array_init __gmpz_array_init
+__GMP_DECLSPEC void mpz_array_init (mpz_ptr, mp_size_t, mp_size_t);
+
+#define mpz_bin_ui __gmpz_bin_ui
+__GMP_DECLSPEC void mpz_bin_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_bin_uiui __gmpz_bin_uiui
+__GMP_DECLSPEC void mpz_bin_uiui (mpz_ptr, unsigned long int, unsigned long int);
+
+#define mpz_cdiv_q __gmpz_cdiv_q
+__GMP_DECLSPEC void mpz_cdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp
+__GMP_DECLSPEC void mpz_cdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_cdiv_qr __gmpz_cdiv_qr
+__GMP_DECLSPEC void mpz_cdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_cdiv_r __gmpz_cdiv_r
+__GMP_DECLSPEC void mpz_cdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp
+__GMP_DECLSPEC void mpz_cdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_cdiv_ui __gmpz_cdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_clear __gmpz_clear
+__GMP_DECLSPEC void mpz_clear (mpz_ptr);
+
+#define mpz_clears __gmpz_clears
+__GMP_DECLSPEC void mpz_clears (mpz_ptr, ...);
+
+#define mpz_clrbit __gmpz_clrbit
+__GMP_DECLSPEC void mpz_clrbit (mpz_ptr, mp_bitcnt_t);
+
+#define mpz_cmp __gmpz_cmp
+__GMP_DECLSPEC int mpz_cmp (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmp_d __gmpz_cmp_d
+__GMP_DECLSPEC int mpz_cmp_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_si __gmpz_cmp_si
+__GMP_DECLSPEC int _mpz_cmp_si (mpz_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_ui __gmpz_cmp_ui
+__GMP_DECLSPEC int _mpz_cmp_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs __gmpz_cmpabs
+__GMP_DECLSPEC int mpz_cmpabs (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_d __gmpz_cmpabs_d
+__GMP_DECLSPEC int mpz_cmpabs_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_ui __gmpz_cmpabs_ui
+__GMP_DECLSPEC int mpz_cmpabs_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_com __gmpz_com
+__GMP_DECLSPEC void mpz_com (mpz_ptr, mpz_srcptr);
+
+#define mpz_combit __gmpz_combit
+__GMP_DECLSPEC void mpz_combit (mpz_ptr, mp_bitcnt_t);
+
+#define mpz_congruent_p __gmpz_congruent_p
+__GMP_DECLSPEC int mpz_congruent_p (mpz_srcptr, mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_2exp_p __gmpz_congruent_2exp_p
+__GMP_DECLSPEC int mpz_congruent_2exp_p (mpz_srcptr, mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_ui_p __gmpz_congruent_ui_p
+__GMP_DECLSPEC int mpz_congruent_ui_p (mpz_srcptr, unsigned long, unsigned long) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divexact __gmpz_divexact
+__GMP_DECLSPEC void mpz_divexact (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_divexact_ui __gmpz_divexact_ui
+__GMP_DECLSPEC void mpz_divexact_ui (mpz_ptr, mpz_srcptr, unsigned long);
+
+#define mpz_divisible_p __gmpz_divisible_p
+__GMP_DECLSPEC int mpz_divisible_p (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_ui_p __gmpz_divisible_ui_p
+__GMP_DECLSPEC int mpz_divisible_ui_p (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_2exp_p __gmpz_divisible_2exp_p
+__GMP_DECLSPEC int mpz_divisible_2exp_p (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_dump __gmpz_dump
+__GMP_DECLSPEC void mpz_dump (mpz_srcptr);
+
+#define mpz_export __gmpz_export
+__GMP_DECLSPEC void *mpz_export (void *, size_t *, int, size_t, int, size_t, mpz_srcptr);
+
+#define mpz_fac_ui __gmpz_fac_ui
+__GMP_DECLSPEC void mpz_fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_2fac_ui __gmpz_2fac_ui
+__GMP_DECLSPEC void mpz_2fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_mfac_uiui __gmpz_mfac_uiui
+__GMP_DECLSPEC void mpz_mfac_uiui (mpz_ptr, unsigned long int, unsigned long int);
+
+#define mpz_primorial_ui __gmpz_primorial_ui
+__GMP_DECLSPEC void mpz_primorial_ui (mpz_ptr, unsigned long int);
+
+#define mpz_fdiv_q __gmpz_fdiv_q
+__GMP_DECLSPEC void mpz_fdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
+__GMP_DECLSPEC void mpz_fdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_fdiv_qr __gmpz_fdiv_qr
+__GMP_DECLSPEC void mpz_fdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_fdiv_r __gmpz_fdiv_r
+__GMP_DECLSPEC void mpz_fdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
+__GMP_DECLSPEC void mpz_fdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_fdiv_ui __gmpz_fdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fib_ui __gmpz_fib_ui
+__GMP_DECLSPEC void mpz_fib_ui (mpz_ptr, unsigned long int);
+
+#define mpz_fib2_ui __gmpz_fib2_ui
+__GMP_DECLSPEC void mpz_fib2_ui (mpz_ptr, mpz_ptr, unsigned long int);
+
+#define mpz_fits_sint_p __gmpz_fits_sint_p
+__GMP_DECLSPEC int mpz_fits_sint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_slong_p __gmpz_fits_slong_p
+__GMP_DECLSPEC int mpz_fits_slong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_sshort_p __gmpz_fits_sshort_p
+__GMP_DECLSPEC int mpz_fits_sshort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_uint_p __gmpz_fits_uint_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_DECLSPEC int mpz_fits_uint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ulong_p __gmpz_fits_ulong_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_DECLSPEC int mpz_fits_ulong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ushort_p __gmpz_fits_ushort_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_DECLSPEC int mpz_fits_ushort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_gcd __gmpz_gcd
+__GMP_DECLSPEC void mpz_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_gcd_ui __gmpz_gcd_ui
+__GMP_DECLSPEC unsigned long int mpz_gcd_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_gcdext __gmpz_gcdext
+__GMP_DECLSPEC void mpz_gcdext (mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_get_d __gmpz_get_d
+__GMP_DECLSPEC double mpz_get_d (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_d_2exp __gmpz_get_d_2exp
+__GMP_DECLSPEC double mpz_get_d_2exp (signed long int *, mpz_srcptr);
+
+#define mpz_get_si __gmpz_get_si
+__GMP_DECLSPEC /* signed */ long int mpz_get_si (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_str __gmpz_get_str
+__GMP_DECLSPEC char *mpz_get_str (char *, int, mpz_srcptr);
+
+#define mpz_get_ui __gmpz_get_ui
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui)
+__GMP_DECLSPEC unsigned long int mpz_get_ui (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_getlimbn __gmpz_getlimbn
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_DECLSPEC mp_limb_t mpz_getlimbn (mpz_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_hamdist __gmpz_hamdist
+__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_import __gmpz_import
+__GMP_DECLSPEC void mpz_import (mpz_ptr, size_t, int, size_t, int, size_t, const void *);
+
+#define mpz_init __gmpz_init
+__GMP_DECLSPEC void mpz_init (mpz_ptr) __GMP_NOTHROW;
+
+#define mpz_init2 __gmpz_init2
+__GMP_DECLSPEC void mpz_init2 (mpz_ptr, mp_bitcnt_t);
+
+#define mpz_inits __gmpz_inits
+__GMP_DECLSPEC void mpz_inits (mpz_ptr, ...) __GMP_NOTHROW;
+
+#define mpz_init_set __gmpz_init_set
+__GMP_DECLSPEC void mpz_init_set (mpz_ptr, mpz_srcptr);
+
+#define mpz_init_set_d __gmpz_init_set_d
+__GMP_DECLSPEC void mpz_init_set_d (mpz_ptr, double);
+
+#define mpz_init_set_si __gmpz_init_set_si
+__GMP_DECLSPEC void mpz_init_set_si (mpz_ptr, signed long int);
+
+#define mpz_init_set_str __gmpz_init_set_str
+__GMP_DECLSPEC int mpz_init_set_str (mpz_ptr, const char *, int);
+
+#define mpz_init_set_ui __gmpz_init_set_ui
+__GMP_DECLSPEC void mpz_init_set_ui (mpz_ptr, unsigned long int);
+
+#define mpz_inp_raw __gmpz_inp_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_raw (mpz_ptr, FILE *);
+#endif
+
+#define mpz_inp_str __gmpz_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_str (mpz_ptr, FILE *, int);
+#endif
+
+#define mpz_invert __gmpz_invert
+__GMP_DECLSPEC int mpz_invert (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_ior __gmpz_ior
+__GMP_DECLSPEC void mpz_ior (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_jacobi __gmpz_jacobi
+__GMP_DECLSPEC int mpz_jacobi (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker mpz_jacobi  /* alias */
+
+#define mpz_kronecker_si __gmpz_kronecker_si
+__GMP_DECLSPEC int mpz_kronecker_si (mpz_srcptr, long) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker_ui __gmpz_kronecker_ui
+__GMP_DECLSPEC int mpz_kronecker_ui (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_si_kronecker __gmpz_si_kronecker
+__GMP_DECLSPEC int mpz_si_kronecker (long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_kronecker __gmpz_ui_kronecker
+__GMP_DECLSPEC int mpz_ui_kronecker (unsigned long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_lcm __gmpz_lcm
+__GMP_DECLSPEC void mpz_lcm (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_lcm_ui __gmpz_lcm_ui
+__GMP_DECLSPEC void mpz_lcm_ui (mpz_ptr, mpz_srcptr, unsigned long);
+
+#define mpz_legendre mpz_jacobi  /* alias */
+
+#define mpz_lucnum_ui __gmpz_lucnum_ui
+__GMP_DECLSPEC void mpz_lucnum_ui (mpz_ptr, unsigned long int);
+
+#define mpz_lucnum2_ui __gmpz_lucnum2_ui
+__GMP_DECLSPEC void mpz_lucnum2_ui (mpz_ptr, mpz_ptr, unsigned long int);
+
+#define mpz_millerrabin __gmpz_millerrabin
+__GMP_DECLSPEC int mpz_millerrabin (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_mod __gmpz_mod
+__GMP_DECLSPEC void mpz_mod (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */
+
+#define mpz_mul __gmpz_mul
+__GMP_DECLSPEC void mpz_mul (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_mul_2exp __gmpz_mul_2exp
+__GMP_DECLSPEC void mpz_mul_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_mul_si __gmpz_mul_si
+__GMP_DECLSPEC void mpz_mul_si (mpz_ptr, mpz_srcptr, long int);
+
+#define mpz_mul_ui __gmpz_mul_ui
+__GMP_DECLSPEC void mpz_mul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_neg __gmpz_neg
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg)
+__GMP_DECLSPEC void mpz_neg (mpz_ptr, mpz_srcptr);
+#endif
+
+#define mpz_nextprime __gmpz_nextprime
+__GMP_DECLSPEC void mpz_nextprime (mpz_ptr, mpz_srcptr);
+
+#define mpz_prevprime __gmpz_prevprime
+__GMP_DECLSPEC int mpz_prevprime (mpz_ptr, mpz_srcptr);
+
+#define mpz_out_raw __gmpz_out_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_raw (FILE *, mpz_srcptr);
+#endif
+
+#define mpz_out_str __gmpz_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_str (FILE *, int, mpz_srcptr);
+#endif
+
+#define mpz_perfect_power_p __gmpz_perfect_power_p
+__GMP_DECLSPEC int mpz_perfect_power_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_perfect_square_p __gmpz_perfect_square_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_DECLSPEC int mpz_perfect_square_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_popcount __gmpz_popcount
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpz_popcount (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_pow_ui __gmpz_pow_ui
+__GMP_DECLSPEC void mpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_powm __gmpz_powm
+__GMP_DECLSPEC void mpz_powm (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_powm_sec __gmpz_powm_sec
+__GMP_DECLSPEC void mpz_powm_sec (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_powm_ui __gmpz_powm_ui
+__GMP_DECLSPEC void mpz_powm_ui (mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr);
+
+#define mpz_probab_prime_p __gmpz_probab_prime_p
+__GMP_DECLSPEC int mpz_probab_prime_p (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_random __gmpz_random
+__GMP_DECLSPEC void mpz_random (mpz_ptr, mp_size_t);
+
+#define mpz_random2 __gmpz_random2
+__GMP_DECLSPEC void mpz_random2 (mpz_ptr, mp_size_t);
+
+#define mpz_realloc2 __gmpz_realloc2
+__GMP_DECLSPEC void mpz_realloc2 (mpz_ptr, mp_bitcnt_t);
+
+#define mpz_remove __gmpz_remove
+__GMP_DECLSPEC mp_bitcnt_t mpz_remove (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_root __gmpz_root
+__GMP_DECLSPEC int mpz_root (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_rootrem __gmpz_rootrem
+__GMP_DECLSPEC void mpz_rootrem (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_rrandomb __gmpz_rrandomb
+__GMP_DECLSPEC void mpz_rrandomb (mpz_ptr, gmp_randstate_ptr, mp_bitcnt_t);
+
+#define mpz_scan0 __gmpz_scan0
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_scan1 __gmpz_scan1
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_set __gmpz_set
+__GMP_DECLSPEC void mpz_set (mpz_ptr, mpz_srcptr);
+
+#define mpz_set_d __gmpz_set_d
+__GMP_DECLSPEC void mpz_set_d (mpz_ptr, double);
+
+#define mpz_set_f __gmpz_set_f
+__GMP_DECLSPEC void mpz_set_f (mpz_ptr, mpf_srcptr);
+
+#define mpz_set_q __gmpz_set_q
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q)
+__GMP_DECLSPEC void mpz_set_q (mpz_ptr, mpq_srcptr);
+#endif
+
+#define mpz_set_si __gmpz_set_si
+__GMP_DECLSPEC void mpz_set_si (mpz_ptr, signed long int);
+
+#define mpz_set_str __gmpz_set_str
+__GMP_DECLSPEC int mpz_set_str (mpz_ptr, const char *, int);
+
+#define mpz_set_ui __gmpz_set_ui
+__GMP_DECLSPEC void mpz_set_ui (mpz_ptr, unsigned long int);
+
+#define mpz_setbit __gmpz_setbit
+__GMP_DECLSPEC void mpz_setbit (mpz_ptr, mp_bitcnt_t);
+
+#define mpz_size __gmpz_size
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size)
+__GMP_DECLSPEC size_t mpz_size (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_sizeinbase __gmpz_sizeinbase
+__GMP_DECLSPEC size_t mpz_sizeinbase (mpz_srcptr, int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_sqrt __gmpz_sqrt
+__GMP_DECLSPEC void mpz_sqrt (mpz_ptr, mpz_srcptr);
+
+#define mpz_sqrtrem __gmpz_sqrtrem
+__GMP_DECLSPEC void mpz_sqrtrem (mpz_ptr, mpz_ptr, mpz_srcptr);
+
+#define mpz_sub __gmpz_sub
+__GMP_DECLSPEC void mpz_sub (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_sub_ui __gmpz_sub_ui
+__GMP_DECLSPEC void mpz_sub_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_ui_sub __gmpz_ui_sub
+__GMP_DECLSPEC void mpz_ui_sub (mpz_ptr, unsigned long int, mpz_srcptr);
+
+#define mpz_submul __gmpz_submul
+__GMP_DECLSPEC void mpz_submul (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_submul_ui __gmpz_submul_ui
+__GMP_DECLSPEC void mpz_submul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_swap __gmpz_swap
+__GMP_DECLSPEC void mpz_swap (mpz_ptr, mpz_ptr) __GMP_NOTHROW;
+
+#define mpz_tdiv_ui __gmpz_tdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_tdiv_q __gmpz_tdiv_q
+__GMP_DECLSPEC void mpz_tdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
+__GMP_DECLSPEC void mpz_tdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_tdiv_qr __gmpz_tdiv_qr
+__GMP_DECLSPEC void mpz_tdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_tdiv_r __gmpz_tdiv_r
+__GMP_DECLSPEC void mpz_tdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
+__GMP_DECLSPEC void mpz_tdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
+
+#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
+
+#define mpz_tstbit __gmpz_tstbit
+__GMP_DECLSPEC int mpz_tstbit (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_pow_ui __gmpz_ui_pow_ui
+__GMP_DECLSPEC void mpz_ui_pow_ui (mpz_ptr, unsigned long int, unsigned long int);
+
+#define mpz_urandomb __gmpz_urandomb
+__GMP_DECLSPEC void mpz_urandomb (mpz_ptr, gmp_randstate_ptr, mp_bitcnt_t);
+
+#define mpz_urandomm __gmpz_urandomm
+__GMP_DECLSPEC void mpz_urandomm (mpz_ptr, gmp_randstate_ptr, mpz_srcptr);
+
+#define mpz_xor __gmpz_xor
+#define mpz_eor __gmpz_xor
+__GMP_DECLSPEC void mpz_xor (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_limbs_read __gmpz_limbs_read
+__GMP_DECLSPEC mp_srcptr mpz_limbs_read (mpz_srcptr);
+
+#define mpz_limbs_write __gmpz_limbs_write
+__GMP_DECLSPEC mp_ptr mpz_limbs_write (mpz_ptr, mp_size_t);
+
+#define mpz_limbs_modify __gmpz_limbs_modify
+__GMP_DECLSPEC mp_ptr mpz_limbs_modify (mpz_ptr, mp_size_t);
+
+#define mpz_limbs_finish __gmpz_limbs_finish
+__GMP_DECLSPEC void mpz_limbs_finish (mpz_ptr, mp_size_t);
+
+#define mpz_roinit_n __gmpz_roinit_n
+__GMP_DECLSPEC mpz_srcptr mpz_roinit_n (mpz_ptr, mp_srcptr, mp_size_t);
+
+#define MPZ_ROINIT_N(xp, xs) {{0, (xs),(xp) }}
+
+/**************** Rational (i.e. Q) routines.  ****************/
+
+#define mpq_abs __gmpq_abs
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs)
+__GMP_DECLSPEC void mpq_abs (mpq_ptr, mpq_srcptr);
+#endif
+
+#define mpq_add __gmpq_add
+__GMP_DECLSPEC void mpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+#define mpq_canonicalize __gmpq_canonicalize
+__GMP_DECLSPEC void mpq_canonicalize (mpq_ptr);
+
+#define mpq_clear __gmpq_clear
+__GMP_DECLSPEC void mpq_clear (mpq_ptr);
+
+#define mpq_clears __gmpq_clears
+__GMP_DECLSPEC void mpq_clears (mpq_ptr, ...);
+
+#define mpq_cmp __gmpq_cmp
+__GMP_DECLSPEC int mpq_cmp (mpq_srcptr, mpq_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_si __gmpq_cmp_si
+__GMP_DECLSPEC int _mpq_cmp_si (mpq_srcptr, long, unsigned long) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_ui __gmpq_cmp_ui
+__GMP_DECLSPEC int _mpq_cmp_ui (mpq_srcptr, unsigned long int, unsigned long int) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_cmp_z __gmpq_cmp_z
+__GMP_DECLSPEC int mpq_cmp_z (mpq_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_div __gmpq_div
+__GMP_DECLSPEC void mpq_div (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+#define mpq_div_2exp __gmpq_div_2exp
+__GMP_DECLSPEC void mpq_div_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
+
+#define mpq_equal __gmpq_equal
+__GMP_DECLSPEC int mpq_equal (mpq_srcptr, mpq_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_num __gmpq_get_num
+__GMP_DECLSPEC void mpq_get_num (mpz_ptr, mpq_srcptr);
+
+#define mpq_get_den __gmpq_get_den
+__GMP_DECLSPEC void mpq_get_den (mpz_ptr, mpq_srcptr);
+
+#define mpq_get_d __gmpq_get_d
+__GMP_DECLSPEC double mpq_get_d (mpq_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_str __gmpq_get_str
+__GMP_DECLSPEC char *mpq_get_str (char *, int, mpq_srcptr);
+
+#define mpq_init __gmpq_init
+__GMP_DECLSPEC void mpq_init (mpq_ptr);
+
+#define mpq_inits __gmpq_inits
+__GMP_DECLSPEC void mpq_inits (mpq_ptr, ...);
+
+#define mpq_inp_str __gmpq_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_inp_str (mpq_ptr, FILE *, int);
+#endif
+
+#define mpq_inv __gmpq_inv
+__GMP_DECLSPEC void mpq_inv (mpq_ptr, mpq_srcptr);
+
+#define mpq_mul __gmpq_mul
+__GMP_DECLSPEC void mpq_mul (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+#define mpq_mul_2exp __gmpq_mul_2exp
+__GMP_DECLSPEC void mpq_mul_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
+
+#define mpq_neg __gmpq_neg
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg)
+__GMP_DECLSPEC void mpq_neg (mpq_ptr, mpq_srcptr);
+#endif
+
+#define mpq_out_str __gmpq_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_out_str (FILE *, int, mpq_srcptr);
+#endif
+
+#define mpq_set __gmpq_set
+__GMP_DECLSPEC void mpq_set (mpq_ptr, mpq_srcptr);
+
+#define mpq_set_d __gmpq_set_d
+__GMP_DECLSPEC void mpq_set_d (mpq_ptr, double);
+
+#define mpq_set_den __gmpq_set_den
+__GMP_DECLSPEC void mpq_set_den (mpq_ptr, mpz_srcptr);
+
+#define mpq_set_f __gmpq_set_f
+__GMP_DECLSPEC void mpq_set_f (mpq_ptr, mpf_srcptr);
+
+#define mpq_set_num __gmpq_set_num
+__GMP_DECLSPEC void mpq_set_num (mpq_ptr, mpz_srcptr);
+
+#define mpq_set_si __gmpq_set_si
+__GMP_DECLSPEC void mpq_set_si (mpq_ptr, signed long int, unsigned long int);
+
+#define mpq_set_str __gmpq_set_str
+__GMP_DECLSPEC int mpq_set_str (mpq_ptr, const char *, int);
+
+#define mpq_set_ui __gmpq_set_ui
+__GMP_DECLSPEC void mpq_set_ui (mpq_ptr, unsigned long int, unsigned long int);
+
+#define mpq_set_z __gmpq_set_z
+__GMP_DECLSPEC void mpq_set_z (mpq_ptr, mpz_srcptr);
+
+#define mpq_sub __gmpq_sub
+__GMP_DECLSPEC void mpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+#define mpq_swap __gmpq_swap
+__GMP_DECLSPEC void mpq_swap (mpq_ptr, mpq_ptr) __GMP_NOTHROW;
+
+
+/**************** Float (i.e. F) routines.  ****************/
+
+#define mpf_abs __gmpf_abs
+__GMP_DECLSPEC void mpf_abs (mpf_ptr, mpf_srcptr);
+
+#define mpf_add __gmpf_add
+__GMP_DECLSPEC void mpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+#define mpf_add_ui __gmpf_add_ui
+__GMP_DECLSPEC void mpf_add_ui (mpf_ptr, mpf_srcptr, unsigned long int);
+#define mpf_ceil __gmpf_ceil
+__GMP_DECLSPEC void mpf_ceil (mpf_ptr, mpf_srcptr);
+
+#define mpf_clear __gmpf_clear
+__GMP_DECLSPEC void mpf_clear (mpf_ptr);
+
+#define mpf_clears __gmpf_clears
+__GMP_DECLSPEC void mpf_clears (mpf_ptr, ...);
+
+#define mpf_cmp __gmpf_cmp
+__GMP_DECLSPEC int mpf_cmp (mpf_srcptr, mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_z __gmpf_cmp_z
+__GMP_DECLSPEC int mpf_cmp_z (mpf_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_d __gmpf_cmp_d
+__GMP_DECLSPEC int mpf_cmp_d (mpf_srcptr, double) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_si __gmpf_cmp_si
+__GMP_DECLSPEC int mpf_cmp_si (mpf_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_ui __gmpf_cmp_ui
+__GMP_DECLSPEC int mpf_cmp_ui (mpf_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_div __gmpf_div
+__GMP_DECLSPEC void mpf_div (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+#define mpf_div_2exp __gmpf_div_2exp
+__GMP_DECLSPEC void mpf_div_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
+
+#define mpf_div_ui __gmpf_div_ui
+__GMP_DECLSPEC void mpf_div_ui (mpf_ptr, mpf_srcptr, unsigned long int);
+
+#define mpf_dump __gmpf_dump
+__GMP_DECLSPEC void mpf_dump (mpf_srcptr);
+
+#define mpf_eq __gmpf_eq
+__GMP_DECLSPEC int mpf_eq (mpf_srcptr, mpf_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sint_p __gmpf_fits_sint_p
+__GMP_DECLSPEC int mpf_fits_sint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_slong_p __gmpf_fits_slong_p
+__GMP_DECLSPEC int mpf_fits_slong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sshort_p __gmpf_fits_sshort_p
+__GMP_DECLSPEC int mpf_fits_sshort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_uint_p __gmpf_fits_uint_p
+__GMP_DECLSPEC int mpf_fits_uint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ulong_p __gmpf_fits_ulong_p
+__GMP_DECLSPEC int mpf_fits_ulong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ushort_p __gmpf_fits_ushort_p
+__GMP_DECLSPEC int mpf_fits_ushort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_floor __gmpf_floor
+__GMP_DECLSPEC void mpf_floor (mpf_ptr, mpf_srcptr);
+
+#define mpf_get_d __gmpf_get_d
+__GMP_DECLSPEC double mpf_get_d (mpf_srcptr) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_d_2exp __gmpf_get_d_2exp
+__GMP_DECLSPEC double mpf_get_d_2exp (signed long int *, mpf_srcptr);
+
+#define mpf_get_default_prec __gmpf_get_default_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec (void) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_prec __gmpf_get_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_si __gmpf_get_si
+__GMP_DECLSPEC long mpf_get_si (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_str __gmpf_get_str
+__GMP_DECLSPEC char *mpf_get_str (char *, mp_exp_t *, int, size_t, mpf_srcptr);
+
+#define mpf_get_ui __gmpf_get_ui
+__GMP_DECLSPEC unsigned long mpf_get_ui (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_init __gmpf_init
+__GMP_DECLSPEC void mpf_init (mpf_ptr);
+
+#define mpf_init2 __gmpf_init2
+__GMP_DECLSPEC void mpf_init2 (mpf_ptr, mp_bitcnt_t);
+
+#define mpf_inits __gmpf_inits
+__GMP_DECLSPEC void mpf_inits (mpf_ptr, ...);
+
+#define mpf_init_set __gmpf_init_set
+__GMP_DECLSPEC void mpf_init_set (mpf_ptr, mpf_srcptr);
+
+#define mpf_init_set_d __gmpf_init_set_d
+__GMP_DECLSPEC void mpf_init_set_d (mpf_ptr, double);
+
+#define mpf_init_set_si __gmpf_init_set_si
+__GMP_DECLSPEC void mpf_init_set_si (mpf_ptr, signed long int);
+
+#define mpf_init_set_str __gmpf_init_set_str
+__GMP_DECLSPEC int mpf_init_set_str (mpf_ptr, const char *, int);
+
+#define mpf_init_set_ui __gmpf_init_set_ui
+__GMP_DECLSPEC void mpf_init_set_ui (mpf_ptr, unsigned long int);
+
+#define mpf_inp_str __gmpf_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_inp_str (mpf_ptr, FILE *, int);
+#endif
+
+#define mpf_integer_p __gmpf_integer_p
+__GMP_DECLSPEC int mpf_integer_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_mul __gmpf_mul
+__GMP_DECLSPEC void mpf_mul (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+#define mpf_mul_2exp __gmpf_mul_2exp
+__GMP_DECLSPEC void mpf_mul_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
+
+#define mpf_mul_ui __gmpf_mul_ui
+__GMP_DECLSPEC void mpf_mul_ui (mpf_ptr, mpf_srcptr, unsigned long int);
+
+#define mpf_neg __gmpf_neg
+__GMP_DECLSPEC void mpf_neg (mpf_ptr, mpf_srcptr);
+
+#define mpf_out_str __gmpf_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_out_str (FILE *, int, size_t, mpf_srcptr);
+#endif
+
+#define mpf_pow_ui __gmpf_pow_ui
+__GMP_DECLSPEC void mpf_pow_ui (mpf_ptr, mpf_srcptr, unsigned long int);
+
+#define mpf_random2 __gmpf_random2
+__GMP_DECLSPEC void mpf_random2 (mpf_ptr, mp_size_t, mp_exp_t);
+
+#define mpf_reldiff __gmpf_reldiff
+__GMP_DECLSPEC void mpf_reldiff (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+#define mpf_set __gmpf_set
+__GMP_DECLSPEC void mpf_set (mpf_ptr, mpf_srcptr);
+
+#define mpf_set_d __gmpf_set_d
+__GMP_DECLSPEC void mpf_set_d (mpf_ptr, double);
+
+#define mpf_set_default_prec __gmpf_set_default_prec
+__GMP_DECLSPEC void mpf_set_default_prec (mp_bitcnt_t) __GMP_NOTHROW;
+
+#define mpf_set_prec __gmpf_set_prec
+__GMP_DECLSPEC void mpf_set_prec (mpf_ptr, mp_bitcnt_t);
+
+#define mpf_set_prec_raw __gmpf_set_prec_raw
+__GMP_DECLSPEC void mpf_set_prec_raw (mpf_ptr, mp_bitcnt_t) __GMP_NOTHROW;
+
+#define mpf_set_q __gmpf_set_q
+__GMP_DECLSPEC void mpf_set_q (mpf_ptr, mpq_srcptr);
+
+#define mpf_set_si __gmpf_set_si
+__GMP_DECLSPEC void mpf_set_si (mpf_ptr, signed long int);
+
+#define mpf_set_str __gmpf_set_str
+__GMP_DECLSPEC int mpf_set_str (mpf_ptr, const char *, int);
+
+#define mpf_set_ui __gmpf_set_ui
+__GMP_DECLSPEC void mpf_set_ui (mpf_ptr, unsigned long int);
+
+#define mpf_set_z __gmpf_set_z
+__GMP_DECLSPEC void mpf_set_z (mpf_ptr, mpz_srcptr);
+
+#define mpf_size __gmpf_size
+__GMP_DECLSPEC size_t mpf_size (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_sqrt __gmpf_sqrt
+__GMP_DECLSPEC void mpf_sqrt (mpf_ptr, mpf_srcptr);
+
+#define mpf_sqrt_ui __gmpf_sqrt_ui
+__GMP_DECLSPEC void mpf_sqrt_ui (mpf_ptr, unsigned long int);
+
+#define mpf_sub __gmpf_sub
+__GMP_DECLSPEC void mpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+#define mpf_sub_ui __gmpf_sub_ui
+__GMP_DECLSPEC void mpf_sub_ui (mpf_ptr, mpf_srcptr, unsigned long int);
+
+#define mpf_swap __gmpf_swap
+__GMP_DECLSPEC void mpf_swap (mpf_ptr, mpf_ptr) __GMP_NOTHROW;
+
+#define mpf_trunc __gmpf_trunc
+__GMP_DECLSPEC void mpf_trunc (mpf_ptr, mpf_srcptr);
+
+#define mpf_ui_div __gmpf_ui_div
+__GMP_DECLSPEC void mpf_ui_div (mpf_ptr, unsigned long int, mpf_srcptr);
+
+#define mpf_ui_sub __gmpf_ui_sub
+__GMP_DECLSPEC void mpf_ui_sub (mpf_ptr, unsigned long int, mpf_srcptr);
+
+#define mpf_urandomb __gmpf_urandomb
+__GMP_DECLSPEC void mpf_urandomb (mpf_ptr, gmp_randstate_ptr, mp_bitcnt_t);
+
+
+/************ Low level positive-integer (i.e. N) routines.  ************/
+
+/* This is ugly, but we need to make user calls reach the prefixed function. */
+
+#define mpn_add __MPN(add)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add)
+__GMP_DECLSPEC mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+#endif
+
+#define mpn_add_1 __MPN(add_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1)
+__GMP_DECLSPEC mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
+#endif
+
+#define mpn_add_n __MPN(add_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_addmul_1 __MPN(addmul_1)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_cmp __MPN(cmp)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp)
+__GMP_DECLSPEC int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpn_zero_p __MPN(zero_p)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_zero_p)
+__GMP_DECLSPEC int mpn_zero_p (mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpn_divexact_1 __MPN(divexact_1)
+__GMP_DECLSPEC void mpn_divexact_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_divexact_by3(dst,src,size) \
+  mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0))
+
+#define mpn_divexact_by3c __MPN(divexact_by3c)
+__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_divmod_1(qp,np,nsize,dlimb) \
+  mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb)
+
+#define mpn_divrem __MPN(divrem)
+__GMP_DECLSPEC mp_limb_t mpn_divrem (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_divrem_1 __MPN(divrem_1)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_divrem_2 __MPN(divrem_2)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+#define mpn_div_qr_1 __MPN(div_qr_1)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_1 (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_div_qr_2 __MPN(div_qr_2)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_gcd __MPN(gcd)
+__GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+
+#define mpn_gcd_11 __MPN(gcd_11)
+__GMP_DECLSPEC mp_limb_t mpn_gcd_11 (mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_gcd_1 __MPN(gcd_1)
+__GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_gcdext_1 __MPN(gcdext_1)
+__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 (mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t);
+
+#define mpn_gcdext __MPN(gcdext)
+__GMP_DECLSPEC mp_size_t mpn_gcdext (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+
+#define mpn_get_str __MPN(get_str)
+__GMP_DECLSPEC size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+
+#define mpn_hamdist __MPN(hamdist)
+__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_lshift __MPN(lshift)
+__GMP_DECLSPEC mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+#define mpn_mod_1 __MPN(mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mul __MPN(mul)
+__GMP_DECLSPEC mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_mul_1 __MPN(mul_1)
+__GMP_DECLSPEC mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_mul_n __MPN(mul_n)
+__GMP_DECLSPEC void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_sqr __MPN(sqr)
+__GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_neg __MPN(neg)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg)
+__GMP_DECLSPEC mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+
+#define mpn_com __MPN(com)
+__GMP_DECLSPEC void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_perfect_square_p __MPN(perfect_square_p)
+__GMP_DECLSPEC int mpn_perfect_square_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_perfect_power_p __MPN(perfect_power_p)
+__GMP_DECLSPEC int mpn_perfect_power_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_popcount __MPN(popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_pow_1 __MPN(pow_1)
+__GMP_DECLSPEC mp_size_t mpn_pow_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+
+/* undocumented now, but retained here for upward compatibility */
+#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_random __MPN(random)
+__GMP_DECLSPEC void mpn_random (mp_ptr, mp_size_t);
+
+#define mpn_random2 __MPN(random2)
+__GMP_DECLSPEC void mpn_random2 (mp_ptr, mp_size_t);
+
+#define mpn_rshift __MPN(rshift)
+__GMP_DECLSPEC mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+#define mpn_scan0 __MPN(scan0)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_scan1 __MPN(scan1)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_set_str __MPN(set_str)
+__GMP_DECLSPEC mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+#define mpn_sizeinbase __MPN(sizeinbase)
+__GMP_DECLSPEC size_t mpn_sizeinbase (mp_srcptr, mp_size_t, int);
+
+#define mpn_sqrtrem __MPN(sqrtrem)
+__GMP_DECLSPEC mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_sub __MPN(sub)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub)
+__GMP_DECLSPEC mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+#endif
+
+#define mpn_sub_1 __MPN(sub_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1)
+__GMP_DECLSPEC mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
+#endif
+
+#define mpn_sub_n __MPN(sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_submul_1 __MPN(submul_1)
+__GMP_DECLSPEC mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_tdiv_qr __MPN(tdiv_qr)
+__GMP_DECLSPEC void mpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_and_n __MPN(and_n)
+__GMP_DECLSPEC void mpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_andn_n __MPN(andn_n)
+__GMP_DECLSPEC void mpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_nand_n __MPN(nand_n)
+__GMP_DECLSPEC void mpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_ior_n __MPN(ior_n)
+__GMP_DECLSPEC void mpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_iorn_n __MPN(iorn_n)
+__GMP_DECLSPEC void mpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_nior_n __MPN(nior_n)
+__GMP_DECLSPEC void mpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_xor_n __MPN(xor_n)
+__GMP_DECLSPEC void mpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_xnor_n __MPN(xnor_n)
+__GMP_DECLSPEC void mpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_copyi __MPN(copyi)
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+#define mpn_copyd __MPN(copyd)
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+#define mpn_zero __MPN(zero)
+__GMP_DECLSPEC void mpn_zero (mp_ptr, mp_size_t);
+
+#define mpn_cnd_add_n __MPN(cnd_add_n)
+__GMP_DECLSPEC mp_limb_t mpn_cnd_add_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_cnd_sub_n __MPN(cnd_sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_cnd_sub_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_sec_add_1 __MPN(sec_add_1)
+__GMP_DECLSPEC mp_limb_t mpn_sec_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_sec_add_1_itch __MPN(sec_add_1_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_add_1_itch (mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_sec_sub_1 __MPN(sec_sub_1)
+__GMP_DECLSPEC mp_limb_t mpn_sec_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_sec_sub_1_itch __MPN(sec_sub_1_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_sub_1_itch (mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_cnd_swap  __MPN(cnd_swap)
+__GMP_DECLSPEC void mpn_cnd_swap (mp_limb_t, volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t);
+
+#define mpn_sec_mul __MPN(sec_mul)
+__GMP_DECLSPEC void mpn_sec_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sec_mul_itch __MPN(sec_mul_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_mul_itch (mp_size_t, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_sec_sqr __MPN(sec_sqr)
+__GMP_DECLSPEC void mpn_sec_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sec_sqr_itch __MPN(sec_sqr_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_sqr_itch (mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_sec_powm __MPN(sec_powm)
+__GMP_DECLSPEC void mpn_sec_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_bitcnt_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sec_powm_itch __MPN(sec_powm_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_powm_itch (mp_size_t, mp_bitcnt_t, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_sec_tabselect __MPN(sec_tabselect)
+__GMP_DECLSPEC void mpn_sec_tabselect (volatile mp_limb_t *, volatile const mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
+
+#define mpn_sec_div_qr __MPN(sec_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sec_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sec_div_qr_itch __MPN(sec_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_div_qr_itch (mp_size_t, mp_size_t) __GMP_ATTRIBUTE_PURE;
+#define mpn_sec_div_r __MPN(sec_div_r)
+__GMP_DECLSPEC void mpn_sec_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sec_div_r_itch __MPN(sec_div_r_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_div_r_itch (mp_size_t, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_sec_invert __MPN(sec_invert)
+__GMP_DECLSPEC int mpn_sec_invert (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_bitcnt_t, mp_ptr);
+#define mpn_sec_invert_itch __MPN(sec_invert_itch)
+__GMP_DECLSPEC mp_size_t mpn_sec_invert_itch (mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+
+/**************** mpz inlines ****************/
+
+/* The following are provided as inlines where possible, but always exist as
+   library functions too, for binary compatibility.
+
+   Within gmp itself this inlining generally isn't relied on, since it
+   doesn't get done for all compilers, whereas if something is worth
+   inlining then it's worth arranging always.
+
+   There are two styles of inlining here.  When the same bit of code is
+   wanted for the inline as for the library version, then __GMP_FORCE_foo
+   arranges for that code to be emitted and the __GMP_EXTERN_INLINE
+   directive suppressed, eg. mpz_fits_uint_p.  When a different bit of code
+   is wanted for the inline than for the library version, then
+   __GMP_FORCE_foo arranges the inline to be suppressed, eg. mpz_abs.  */
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_abs)
+__GMP_EXTERN_INLINE void
+mpz_abs (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = __GMP_ABS (__gmp_w->_mp_size);
+}
+#endif
+
+#if GMP_NAIL_BITS == 0
+#define __GMPZ_FITS_UTYPE_P(z,maxval)					\
+  mp_size_t  __gmp_n = z->_mp_size;					\
+  mp_ptr  __gmp_p = z->_mp_d;						\
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval));
+#else
+#define __GMPZ_FITS_UTYPE_P(z,maxval)					\
+  mp_size_t  __gmp_n = z->_mp_size;					\
+  mp_ptr  __gmp_p = z->_mp_d;						\
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval)	\
+	  || (__gmp_n == 2 && __gmp_p[1] <= ((mp_limb_t) maxval >> GMP_NUMB_BITS)));
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_uint_p)
+#if ! defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_uint_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, UINT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ulong_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ulong_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, ULONG_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ushort_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ushort_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, USHRT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_get_ui)
+#if ! defined (__GMP_FORCE_mpz_get_ui)
+__GMP_EXTERN_INLINE
+#endif
+unsigned long
+mpz_get_ui (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  mp_ptr __gmp_p = __gmp_z->_mp_d;
+  mp_size_t __gmp_n = __gmp_z->_mp_size;
+  mp_limb_t __gmp_l = __gmp_p[0];
+  /* This is a "#if" rather than a plain "if" so as to avoid gcc warnings
+     about "<< GMP_NUMB_BITS" exceeding the type size, and to avoid Borland
+     C++ 6.0 warnings about condition always true for something like
+     "ULONG_MAX < GMP_NUMB_MASK".  */
+#if GMP_NAIL_BITS == 0 || defined (_LONG_LONG_LIMB)
+  /* limb==long and no nails, or limb==longlong, one limb is enough */
+  return (__gmp_n != 0 ? __gmp_l : 0);
+#else
+  /* limb==long and nails, need two limbs when available */
+  __gmp_n = __GMP_ABS (__gmp_n);
+  if (__gmp_n <= 1)
+    return (__gmp_n != 0 ? __gmp_l : 0);
+  else
+    return __gmp_l + (__gmp_p[1] << GMP_NUMB_BITS);
+#endif
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_getlimbn)
+#if ! defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpz_getlimbn (mpz_srcptr __gmp_z, mp_size_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_result = 0;
+  if (__GMP_LIKELY (__gmp_n >= 0 && __gmp_n < __GMP_ABS (__gmp_z->_mp_size)))
+    __gmp_result = __gmp_z->_mp_d[__gmp_n];
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_neg)
+__GMP_EXTERN_INLINE void
+mpz_neg (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = - __gmp_w->_mp_size;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_perfect_square_p)
+#if ! defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_perfect_square_p (mpz_srcptr __gmp_a)
+{
+  mp_size_t __gmp_asize;
+  int       __gmp_result;
+
+  __gmp_asize = __gmp_a->_mp_size;
+  __gmp_result = (__gmp_asize >= 0);  /* zero is a square, negatives are not */
+  if (__GMP_LIKELY (__gmp_asize > 0))
+    __gmp_result = mpn_perfect_square_p (__gmp_a->_mp_d, __gmp_asize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_popcount)
+#if ! defined (__GMP_FORCE_mpz_popcount)
+__GMP_EXTERN_INLINE
+#endif
+mp_bitcnt_t
+mpz_popcount (mpz_srcptr __gmp_u) __GMP_NOTHROW
+{
+  mp_size_t      __gmp_usize;
+  mp_bitcnt_t    __gmp_result;
+
+  __gmp_usize = __gmp_u->_mp_size;
+  __gmp_result = (__gmp_usize < 0 ? ~ __GMP_CAST (mp_bitcnt_t, 0) : __GMP_CAST (mp_bitcnt_t, 0));
+  if (__GMP_LIKELY (__gmp_usize > 0))
+    __gmp_result =  mpn_popcount (__gmp_u->_mp_d, __gmp_usize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_set_q)
+#if ! defined (__GMP_FORCE_mpz_set_q)
+__GMP_EXTERN_INLINE
+#endif
+void
+mpz_set_q (mpz_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  mpz_tdiv_q (__gmp_w, mpq_numref (__gmp_u), mpq_denref (__gmp_u));
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_size)
+#if ! defined (__GMP_FORCE_mpz_size)
+__GMP_EXTERN_INLINE
+#endif
+size_t
+mpz_size (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  return __GMP_ABS (__gmp_z->_mp_size);
+}
+#endif
+
+
+/**************** mpq inlines ****************/
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_abs)
+__GMP_EXTERN_INLINE void
+mpq_abs (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = __GMP_ABS (__gmp_w->_mp_num._mp_size);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_neg)
+__GMP_EXTERN_INLINE void
+mpq_neg (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = - __gmp_w->_mp_num._mp_size;
+}
+#endif
+
+
+/**************** mpn inlines ****************/
+
+/* The comments with __GMPN_ADD_1 below apply here too.
+
+   The test for FUNCTION returning 0 should predict well.  If it's assumed
+   {yp,ysize} will usually have a random number of bits then the high limb
+   won't be full and a carry out will occur a good deal less than 50% of the
+   time.
+
+   ysize==0 isn't a documented feature, but is used internally in a few
+   places.
+
+   Producing cout last stops it using up a register during the main part of
+   the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))"
+   doesn't seem able to move the true and false legs of the conditional up
+   to the two places cout is generated.  */
+
+#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST)     \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x;                                                 \
+                                                                        \
+    /* ASSERT ((ysize) >= 0); */                                        \
+    /* ASSERT ((xsize) >= (ysize)); */                                  \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */      \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */      \
+                                                                        \
+    __gmp_i = (ysize);                                                  \
+    if (__gmp_i != 0)                                                   \
+      {                                                                 \
+        if (FUNCTION (wp, xp, yp, __gmp_i))                             \
+          {                                                             \
+            do                                                          \
+              {                                                         \
+                if (__gmp_i >= (xsize))                                 \
+                  {                                                     \
+                    (cout) = 1;                                         \
+                    goto __gmp_done;                                    \
+                  }                                                     \
+                __gmp_x = (xp)[__gmp_i];                                \
+              }                                                         \
+            while (TEST);                                               \
+          }                                                             \
+      }                                                                 \
+    if ((wp) != (xp))                                                   \
+      __GMPN_COPY_REST (wp, xp, xsize, __gmp_i);                        \
+    (cout) = 0;                                                         \
+  __gmp_done:                                                           \
+    ;                                                                   \
+  } while (0)
+
+#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x + 1) & GMP_NUMB_MASK) == 0))
+#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x - 1) & GMP_NUMB_MASK), __gmp_x == 0))
+
+
+/* The use of __gmp_i indexing is designed to ensure a compile time src==dst
+   remains nice and clear to the compiler, so that __GMPN_COPY_REST can
+   disappear, and the load/add/store gets a chance to become a
+   read-modify-write on CISC CPUs.
+
+   Alternatives:
+
+   Using a pair of pointers instead of indexing would be possible, but gcc
+   isn't able to recognise compile-time src==dst in that case, even when the
+   pointers are incremented more or less together.  Other compilers would
+   very likely have similar difficulty.
+
+   gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or
+   similar to detect a compile-time src==dst.  This works nicely on gcc
+   2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems
+   to be always false, for a pointer p.  But the current code form seems
+   good enough for src==dst anyway.
+
+   gcc on x86 as usual doesn't give particularly good flags handling for the
+   carry/borrow detection.  It's tempting to want some multi instruction asm
+   blocks to help it, and this was tried, but in truth there's only a few
+   instructions to save and any gain is all too easily lost by register
+   juggling setting up for the asm.  */
+
+#if GMP_NAIL_BITS == 0
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)		\
+  do {								\
+    mp_size_t  __gmp_i;						\
+    mp_limb_t  __gmp_x, __gmp_r;                                \
+								\
+    /* ASSERT ((n) >= 1); */					\
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */	\
+								\
+    __gmp_x = (src)[0];						\
+    __gmp_r = __gmp_x OP (v);                                   \
+    (dst)[0] = __gmp_r;						\
+    if (CB (__gmp_r, __gmp_x, (v)))                             \
+      {								\
+	(cout) = 1;						\
+	for (__gmp_i = 1; __gmp_i < (n);)                       \
+	  {							\
+	    __gmp_x = (src)[__gmp_i];                           \
+	    __gmp_r = __gmp_x OP 1;                             \
+	    (dst)[__gmp_i] = __gmp_r;                           \
+	    ++__gmp_i;						\
+	    if (!CB (__gmp_r, __gmp_x, 1))                      \
+	      {							\
+		if ((src) != (dst))				\
+		  __GMPN_COPY_REST (dst, src, n, __gmp_i);      \
+		(cout) = 0;					\
+		break;						\
+	      }							\
+	  }							\
+      }								\
+    else							\
+      {								\
+	if ((src) != (dst))					\
+	  __GMPN_COPY_REST (dst, src, n, 1);			\
+	(cout) = 0;						\
+      }								\
+  } while (0)
+#endif
+
+#if GMP_NAIL_BITS >= 1
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)		\
+  do {								\
+    mp_size_t  __gmp_i;						\
+    mp_limb_t  __gmp_x, __gmp_r;				\
+								\
+    /* ASSERT ((n) >= 1); */					\
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */	\
+								\
+    __gmp_x = (src)[0];						\
+    __gmp_r = __gmp_x OP (v);					\
+    (dst)[0] = __gmp_r & GMP_NUMB_MASK;				\
+    if (__gmp_r >> GMP_NUMB_BITS != 0)				\
+      {								\
+	(cout) = 1;						\
+	for (__gmp_i = 1; __gmp_i < (n);)			\
+	  {							\
+	    __gmp_x = (src)[__gmp_i];				\
+	    __gmp_r = __gmp_x OP 1;				\
+	    (dst)[__gmp_i] = __gmp_r & GMP_NUMB_MASK;		\
+	    ++__gmp_i;						\
+	    if (__gmp_r >> GMP_NUMB_BITS == 0)			\
+	      {							\
+		if ((src) != (dst))				\
+		  __GMPN_COPY_REST (dst, src, n, __gmp_i);	\
+		(cout) = 0;					\
+		break;						\
+	      }							\
+	  }							\
+      }								\
+    else							\
+      {								\
+	if ((src) != (dst))					\
+	  __GMPN_COPY_REST (dst, src, n, 1);			\
+	(cout) = 0;						\
+      }								\
+  } while (0)
+#endif
+
+#define __GMPN_ADDCB(r,x,y) ((r) < (y))
+#define __GMPN_SUBCB(r,x,y) ((x) < (y))
+
+#define __GMPN_ADD_1(cout, dst, src, n, v)	     \
+  __GMPN_AORS_1(cout, dst, src, n, v, +, __GMPN_ADDCB)
+#define __GMPN_SUB_1(cout, dst, src, n, v)	     \
+  __GMPN_AORS_1(cout, dst, src, n, v, -, __GMPN_SUBCB)
+
+
+/* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or
+   negative.  size==0 is allowed.  On random data usually only one limb will
+   need to be examined to get a result, so it's worth having it inline.  */
+#define __GMPN_CMP(result, xp, yp, size)                                \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x, __gmp_y;                                        \
+                                                                        \
+    /* ASSERT ((size) >= 0); */                                         \
+                                                                        \
+    (result) = 0;                                                       \
+    __gmp_i = (size);                                                   \
+    while (--__gmp_i >= 0)                                              \
+      {                                                                 \
+        __gmp_x = (xp)[__gmp_i];                                        \
+        __gmp_y = (yp)[__gmp_i];                                        \
+        if (__gmp_x != __gmp_y)                                         \
+          {                                                             \
+            /* Cannot use __gmp_x - __gmp_y, may overflow an "int" */   \
+            (result) = (__gmp_x > __gmp_y ? 1 : -1);                    \
+            break;                                                      \
+          }                                                             \
+      }                                                                 \
+  } while (0)
+
+
+#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \
+  } while (0)
+#endif
+
+/* Copy {src,size} to {dst,size}, starting at "start".  This is designed to
+   keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1,
+   __GMPN_ADD, etc.  */
+#if ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    mp_size_t __gmp_j;                                          \
+    /* ASSERT ((size) >= 0); */                                 \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */     \
+    __GMP_CRAY_Pragma ("_CRI ivdep");                           \
+    for (__gmp_j = (start); __gmp_j < (size); __gmp_j++)        \
+      (dst)[__gmp_j] = (src)[__gmp_j];                          \
+  } while (0)
+#endif
+
+/* Enhancement: Use some of the smarter code from gmp-impl.h.  Maybe use
+   mpn_copyi if there's a native version, and if we don't mind demanding
+   binary compatibility for it (on targets which use it).  */
+
+#if ! defined (__GMPN_COPY)
+#define __GMPN_COPY(dst, src, size)   __GMPN_COPY_REST (dst, src, size, 0)
+#endif
+
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add)
+#if ! defined (__GMP_FORCE_mpn_add)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add_1)
+#if ! defined (__GMP_FORCE_mpn_add_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_cmp)
+#if ! defined (__GMP_FORCE_mpn_cmp)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpn_cmp (mp_srcptr __gmp_xp, mp_srcptr __gmp_yp, mp_size_t __gmp_size) __GMP_NOTHROW
+{
+  int __gmp_result;
+  __GMPN_CMP (__gmp_result, __gmp_xp, __gmp_yp, __gmp_size);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_zero_p)
+#if ! defined (__GMP_FORCE_mpn_zero_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpn_zero_p (mp_srcptr __gmp_p, mp_size_t __gmp_n) __GMP_NOTHROW
+{
+  /* if (__GMP_LIKELY (__gmp_n > 0)) */
+    do {
+      if (__gmp_p[--__gmp_n] != 0)
+	return 0;
+    } while (__gmp_n != 0);
+  return 1;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub)
+#if ! defined (__GMP_FORCE_mpn_sub)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub_1)
+#if ! defined (__GMP_FORCE_mpn_sub_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_neg)
+#if ! defined (__GMP_FORCE_mpn_neg)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_neg (mp_ptr __gmp_rp, mp_srcptr __gmp_up, mp_size_t __gmp_n)
+{
+  while (*__gmp_up == 0) /* Low zero limbs are unchanged by negation. */
+    {
+      *__gmp_rp = 0;
+      if (!--__gmp_n) /* All zero */
+	return 0;
+      ++__gmp_up; ++__gmp_rp;
+    }
+
+  *__gmp_rp = (- *__gmp_up) & GMP_NUMB_MASK;
+
+  if (--__gmp_n) /* Higher limbs get complemented. */
+    mpn_com (++__gmp_rp, ++__gmp_up, __gmp_n);
+
+  return 1;
+}
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* Allow faster testing for negative, zero, and positive.  */
+#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0)
+#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0)
+#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0)
+
+/* When using GCC, optimize certain common comparisons.  */
+#if defined (__GNUC__) && __GNUC__ >= 2
+#define mpz_cmp_ui(Z,UI) \
+  (__builtin_constant_p (UI) && (UI) == 0				\
+   ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,SI)						\
+  (__builtin_constant_p ((SI) >= 0) && (SI) >= 0			\
+   ? mpz_cmp_ui (Z, __GMP_CAST (unsigned long, SI))			\
+   : _mpz_cmp_si (Z,SI))
+#define mpq_cmp_ui(Q,NUI,DUI)					\
+  (__builtin_constant_p (NUI) && (NUI) == 0 ? mpq_sgn (Q)	\
+   : __builtin_constant_p ((NUI) == (DUI)) && (NUI) == (DUI)	\
+   ? mpz_cmp (mpq_numref (Q), mpq_denref (Q))			\
+   : _mpq_cmp_ui (Q,NUI,DUI))
+#define mpq_cmp_si(q,n,d)				\
+  (__builtin_constant_p ((n) >= 0) && (n) >= 0		\
+   ? mpq_cmp_ui (q, __GMP_CAST (unsigned long, n), d)	\
+   : _mpq_cmp_si (q, n, d))
+#else
+#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI)
+#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI)
+#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI)
+#define mpq_cmp_si(q,n,d)  _mpq_cmp_si(q,n,d)
+#endif
+
+
+/* Using "&" rather than "&&" means these can come out branch-free.  Every
+   mpz_t has at least one limb allocated, so fetching the low limb is always
+   allowed.  */
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & __GMP_CAST (int, (z)->_mp_d[0]))
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+
+/**************** C++ routines ****************/
+
+#ifdef __cplusplus
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpz_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpq_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpf_srcptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpz_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpq_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpf_ptr);
+#endif
+
+
+/* Source-level compatibility with GMP 2 and earlier. */
+#define mpn_divmod(qp,np,nsize,dp,dsize) \
+  mpn_divrem (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dp, dsize)
+
+/* Source-level compatibility with GMP 1.  */
+#define mpz_mdiv	mpz_fdiv_q
+#define mpz_mdivmod	mpz_fdiv_qr
+#define mpz_mmod	mpz_fdiv_r
+#define mpz_mdiv_ui	mpz_fdiv_q_ui
+#define mpz_mdivmod_ui(q,r,n,d) \
+  (((r) == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
+#define mpz_mmod_ui(r,n,d) \
+  (((r) == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
+
+/* Useful synonyms, but not quite compatible with GMP 1.  */
+#define mpz_div		mpz_fdiv_q
+#define mpz_divmod	mpz_fdiv_qr
+#define mpz_div_ui	mpz_fdiv_q_ui
+#define mpz_divmod_ui	mpz_fdiv_qr_ui
+#define mpz_div_2exp	mpz_fdiv_q_2exp
+#define mpz_mod_2exp	mpz_fdiv_r_2exp
+
+enum
+{
+  GMP_ERROR_NONE = 0,
+  GMP_ERROR_UNSUPPORTED_ARGUMENT = 1,
+  GMP_ERROR_DIVISION_BY_ZERO = 2,
+  GMP_ERROR_SQRT_OF_NEGATIVE = 4,
+  GMP_ERROR_INVALID_ARGUMENT = 8,
+  GMP_ERROR_MPZ_OVERFLOW = 16
+};
+
+/* Define CC and CFLAGS which were used to build this version of GMP */
+#define __GMP_CC "@CC@"
+#define __GMP_CFLAGS "@CFLAGS@"
+
+/* Major version number is the value of __GNU_MP__ too, above. */
+#define __GNU_MP_VERSION            6
+#define __GNU_MP_VERSION_MINOR      3
+#define __GNU_MP_VERSION_PATCHLEVEL 0
+#define __GNU_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
+
+#define __GMP_H__
+#endif /* __GMP_H__ */

diff --git a/gmp-impl.h b/gmp-impl.h
new file mode 100644
index 0000000..2615af7
--- /dev/null
+++ b/gmp-impl.h

@@ -0,0 +1,5346 @@
+/* Include file for internal GNU MP types and definitions.
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 1991-2018, 2021, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* __GMP_DECLSPEC must be given on any global data that will be accessed
+   from outside libgmp, meaning from the test or development programs, or
+   from libgmpxx.  Failing to do this will result in an incorrect address
+   being used for the accesses.  On functions __GMP_DECLSPEC makes calls
+   from outside libgmp more efficient, but they'll still work fine without
+   it.  */
+
+
+#ifndef __GMP_IMPL_H__
+#define __GMP_IMPL_H__
+
+#if defined _CRAY
+#include <intrinsics.h>  /* for _popcnt */
+#endif
+
+/* For INT_MAX, etc. We used to avoid it because of a bug (on solaris,
+   gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
+   values (the ABI=64 values)), but it should be safe now.
+
+   On Cray vector systems, however, we need the system limits.h since sizes
+   of signed and unsigned types can differ there, depending on compiler
+   options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
+   reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
+   short can be 24, 32, 46 or 64 bits, and different for ushort.  */
+
+#include <limits.h>
+
+/* For fat.h and other fat binary stuff.
+   No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
+   declared this way are only used to set function pointers in __gmpn_cpuvec,
+   they're not called directly.  */
+#define DECL_add_n(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_addlsh1_n(name) \
+  DECL_add_n (name)
+#define DECL_addlsh2_n(name) \
+  DECL_add_n (name)
+#define DECL_addmul_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_addmul_2(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
+#define DECL_bdiv_dbm1c(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_cnd_add_n(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_cnd_sub_n(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_com(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
+#define DECL_copyd(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
+#define DECL_copyi(name) \
+  DECL_copyd (name)
+#define DECL_divexact_1(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_divexact_by3c(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_divrem_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_gcd_11(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t)
+#define DECL_lshift(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
+#define DECL_lshiftc(name) \
+  DECL_lshift (name)
+#define DECL_mod_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_mod_1_1p(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [])
+#define DECL_mod_1_1p_cps(name) \
+  __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
+#define DECL_mod_1s_2p(name) \
+  DECL_mod_1_1p (name)
+#define DECL_mod_1s_2p_cps(name) \
+  DECL_mod_1_1p_cps (name)
+#define DECL_mod_1s_4p(name) \
+  DECL_mod_1_1p (name)
+#define DECL_mod_1s_4p_cps(name) \
+  DECL_mod_1_1p_cps (name)
+#define DECL_mod_34lsub1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
+#define DECL_modexact_1c_odd(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_mul_1(name) \
+  DECL_addmul_1 (name)
+#define DECL_mul_basecase(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
+#define DECL_mullo_basecase(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_preinv_divrem_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
+#define DECL_preinv_mod_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_redc_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_redc_2(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
+#define DECL_rshift(name) \
+  DECL_lshift (name)
+#define DECL_sqr_basecase(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
+#define DECL_sub_n(name) \
+  DECL_add_n (name)
+#define DECL_sublsh1_n(name) \
+  DECL_add_n (name)
+#define DECL_submul_1(name) \
+  DECL_addmul_1 (name)
+
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+#include "config.h"
+#include "gmp.h"
+#include "gmp-mparam.h"
+#include "fib_table.h"
+#include "fac_table.h"
+#include "sieve_table.h"
+#include "mp_bases.h"
+#if WANT_FAT_BINARY
+#include "fat.h"
+#endif
+#endif
+
+#if HAVE_INTTYPES_H      /* for uint_least32_t */
+# include <inttypes.h>
+#endif
+/* On some platforms inttypes.h exists but is incomplete
+   and we still need stdint.h. */
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+#include <cstring>  /* for strlen */
+#include <string>   /* for std::string */
+#endif
+
+
+#ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
+#define WANT_TMP_DEBUG 0
+#endif
+
+/* The following tries to get a good version of alloca.  The tests are
+   adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
+   Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
+   be setup appropriately.
+
+   ifndef alloca - a cpp define might already exist.
+       glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
+       HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
+
+   GCC __builtin_alloca - preferred whenever available.
+
+   _AIX pragma - IBM compilers need a #pragma in "each module that needs to
+       use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
+       used in past versions of GMP, retained still in case it matters.
+
+       The autoconf manual says this pragma needs to be at the start of a C
+       file, apart from comments and preprocessor directives.  Is that true?
+       xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
+       from gmp.h.
+*/
+
+#ifndef alloca
+# ifdef __GNUC__
+#  define alloca __builtin_alloca
+# else
+#  ifdef __DECC
+#   define alloca(x) __ALLOCA(x)
+#  else
+#   ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#   else
+#    if HAVE_ALLOCA_H
+#     include <alloca.h>
+#    else
+#     if defined (_AIX) || defined (_IBMR2)
+ #pragma alloca
+#     else
+       char *alloca ();
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+#endif
+
+
+/* if not provided by gmp-mparam.h */
+#ifndef GMP_LIMB_BYTES
+#define GMP_LIMB_BYTES  SIZEOF_MP_LIMB_T
+#endif
+#ifndef GMP_LIMB_BITS
+#define GMP_LIMB_BITS  (8 * SIZEOF_MP_LIMB_T)
+#endif
+
+#define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
+
+
+/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
+#if HAVE_UINT_LEAST32_T
+typedef uint_least32_t      gmp_uint_least32_t;
+#else
+#if SIZEOF_UNSIGNED_SHORT >= 4
+typedef unsigned short      gmp_uint_least32_t;
+#else
+#if SIZEOF_UNSIGNED >= 4
+typedef unsigned            gmp_uint_least32_t;
+#else
+typedef unsigned long       gmp_uint_least32_t;
+#endif
+#endif
+#endif
+
+
+/* gmp_intptr_t, for pointer to integer casts */
+#if HAVE_INTPTR_T
+typedef intptr_t            gmp_intptr_t;
+#else /* fallback */
+typedef size_t              gmp_intptr_t;
+#endif
+
+
+/* pre-inverse types for truncating division and modulo */
+typedef struct {mp_limb_t inv32;} gmp_pi1_t;
+typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
+
+
+/* "const" basically means a function does nothing but examine its arguments
+   and give a return value, it doesn't read or write any memory (neither
+   global nor pointed to by arguments), and has no other side-effects.  This
+   is more restrictive than "pure".  See info node "(gcc)Function
+   Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
+   this off when trying to write timing loops.  */
+#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
+#define ATTRIBUTE_CONST  __attribute__ ((const))
+#else
+#define ATTRIBUTE_CONST
+#endif
+
+#if HAVE_ATTRIBUTE_NORETURN
+#define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+
+/* "malloc" means a function behaves like malloc in that the pointer it
+   returns doesn't alias anything.  */
+#if HAVE_ATTRIBUTE_MALLOC
+#define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
+#else
+#define ATTRIBUTE_MALLOC
+#endif
+
+
+#if ! HAVE_STRCHR
+#define strchr(s,c)  index(s,c)
+#endif
+
+#if ! HAVE_MEMSET
+#define memset(p, c, n)			\
+  do {					\
+    ASSERT ((n) >= 0);			\
+    char *__memset__p = (p);		\
+    int	 __i;				\
+    for (__i = 0; __i < (n); __i++)	\
+      __memset__p[__i] = (c);		\
+  } while (0)
+#endif
+
+/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
+   mode.  Falling back to a memcpy will give maximum portability, since it
+   works no matter whether va_list is a pointer, struct or array.  */
+#if ! defined (va_copy) && defined (__va_copy)
+#define va_copy(dst,src)  __va_copy(dst,src)
+#endif
+#if ! defined (va_copy)
+#define va_copy(dst,src) \
+  do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
+#endif
+
+
+/* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
+   (ie. ctlz, ctpop, cttz).  */
+#if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
+  || HAVE_HOST_CPU_alphaev7
+#define HAVE_HOST_CPU_alpha_CIX 1
+#endif
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* Usage: TMP_DECL;
+	  TMP_MARK;
+	  ptr = TMP_ALLOC (bytes);
+	  TMP_FREE;
+
+   Small allocations should use TMP_SALLOC, big allocations should use
+   TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
+
+   Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
+   TMP_SFREE.
+
+   TMP_DECL just declares a variable, but might be empty and so must be last
+   in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
+   TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
+   TMP_MARK was made, but then no TMP_ALLOCs.  */
+
+/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
+   __gmp_allocate_func doesn't already determine it.  */
+union tmp_align_t {
+  mp_limb_t  l;
+  double     d;
+  char       *p;
+};
+#define __TMP_ALIGN  sizeof (union tmp_align_t)
+
+/* Return "a" rounded upwards to a multiple of "m", if it isn't already.
+   "a" must be an unsigned type.
+   This is designed for use with a compile-time constant "m".
+   The POW2 case is expected to be usual, and gcc 3.0 and up recognises
+   "(-(8*n))%8" or the like is always zero, which means the rounding up in
+   the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
+#define ROUND_UP_MULTIPLE(a,m)          \
+  (POW2_P(m) ? (a) + (-(a))%(m)         \
+   : (a)+(m)-1 - (((a)+(m)-1) % (m)))
+
+#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
+struct tmp_reentrant_t {
+  struct tmp_reentrant_t  *next;
+  size_t		  size;	  /* bytes, including header */
+};
+__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
+#endif
+
+#if WANT_TMP_ALLOCA
+#define TMP_SDECL
+#define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
+#define TMP_SMARK
+#define TMP_MARK		__tmp_marker = 0
+#define TMP_SALLOC(n)		alloca(n)
+#define TMP_BALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
+/* The peculiar stack allocation limit here is chosen for efficient asm.  */
+#define TMP_ALLOC(n)							\
+  (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n))
+#define TMP_SFREE
+#define TMP_FREE							\
+  do {									\
+    if (UNLIKELY (__tmp_marker != 0))					\
+      __gmp_tmp_reentrant_free (__tmp_marker);				\
+  } while (0)
+#endif
+
+#if WANT_TMP_REENTRANT
+#define TMP_SDECL		TMP_DECL
+#define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
+#define TMP_SMARK		TMP_MARK
+#define TMP_MARK		__tmp_marker = 0
+#define TMP_SALLOC(n)		TMP_ALLOC(n)
+#define TMP_BALLOC(n)		TMP_ALLOC(n)
+#define TMP_ALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
+#define TMP_SFREE		TMP_FREE
+#define TMP_FREE		__gmp_tmp_reentrant_free (__tmp_marker)
+#endif
+
+#if WANT_TMP_NOTREENTRANT
+struct tmp_marker
+{
+  struct tmp_stack *which_chunk;
+  void *alloc_point;
+};
+__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
+__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
+#define TMP_SDECL		TMP_DECL
+#define TMP_DECL		struct tmp_marker __tmp_marker
+#define TMP_SMARK		TMP_MARK
+#define TMP_MARK		__gmp_tmp_mark (&__tmp_marker)
+#define TMP_SALLOC(n)		TMP_ALLOC(n)
+#define TMP_BALLOC(n)		TMP_ALLOC(n)
+#define TMP_ALLOC(n)							\
+  __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
+#define TMP_SFREE		TMP_FREE
+#define TMP_FREE		__gmp_tmp_free (&__tmp_marker)
+#endif
+
+#if WANT_TMP_DEBUG
+/* See tal-debug.c for some comments. */
+struct tmp_debug_t {
+  struct tmp_debug_entry_t  *list;
+  const char                *file;
+  int                       line;
+};
+struct tmp_debug_entry_t {
+  struct tmp_debug_entry_t  *next;
+  void                      *block;
+  size_t                    size;
+};
+__GMP_DECLSPEC void  __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
+					   struct tmp_debug_t *,
+					   const char *, const char *);
+__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
+					    struct tmp_debug_t **, const char *,
+					    size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_debug_free (const char *, int, int,
+					   struct tmp_debug_t **,
+					   const char *, const char *);
+#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
+/* The marker variable is designed to provoke an uninitialized variable
+   warning from the compiler if TMP_FREE is used without a TMP_MARK.
+   __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
+   these things up too.  */
+#define TMP_DECL_NAME(marker, marker_name)				\
+  int marker;								\
+  int __tmp_marker_inscope;						\
+  const char *__tmp_marker_name = marker_name;				\
+  struct tmp_debug_t  __tmp_marker_struct;				\
+  /* don't demand NULL, just cast a zero */				\
+  struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
+#define TMP_MARK_NAME(marker, marker_name)				\
+  do {									\
+    marker = 1;								\
+    __tmp_marker_inscope = 1;						\
+    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,			\
+			   &__tmp_marker, &__tmp_marker_struct,		\
+			   __tmp_marker_name, marker_name);		\
+  } while (0)
+#define TMP_SALLOC(n)		TMP_ALLOC(n)
+#define TMP_BALLOC(n)		TMP_ALLOC(n)
+#define TMP_ALLOC(size)							\
+  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,			\
+			 __tmp_marker_inscope,				\
+			 &__tmp_marker, __tmp_marker_name, size)
+#define TMP_FREE_NAME(marker, marker_name)				\
+  do {									\
+    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,			\
+			   marker, &__tmp_marker,			\
+			   __tmp_marker_name, marker_name);		\
+  } while (0)
+#endif /* WANT_TMP_DEBUG */
+
+
+/* Allocating various types. */
+#define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
+#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
+#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
+#define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
+#define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
+#define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
+#define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
+#define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
+#define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
+
+/* It's more efficient to allocate one block than many.  This is certainly
+   true of the malloc methods, but it can even be true of alloca if that
+   involves copying a chunk of stack (various RISCs), or a call to a stack
+   bounds check (mingw).  In any case, when debugging keep separate blocks
+   so a redzoning malloc debugger can protect each individually.  */
+#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)				\
+  do {									\
+    if (WANT_TMP_DEBUG)							\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS (xsize);					\
+	(yp) = TMP_ALLOC_LIMBS (ysize);					\
+      }									\
+    else								\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));			\
+	(yp) = (xp) + (xsize);						\
+      }									\
+  } while (0)
+#define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize)			\
+  do {									\
+    if (WANT_TMP_DEBUG)							\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS (xsize);					\
+	(yp) = TMP_ALLOC_LIMBS (ysize);					\
+	(zp) = TMP_ALLOC_LIMBS (zsize);					\
+      }									\
+    else								\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize));		\
+	(yp) = (xp) + (xsize);						\
+	(zp) = (yp) + (ysize);						\
+      }									\
+  } while (0)
+
+/* From gmp.h, nicer names for internal use. */
+#define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
+#define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
+#define LIKELY(cond)                   __GMP_LIKELY(cond)
+#define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
+
+#define ABS(x) ((x) >= 0 ? (x) : -(x))
+#define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1))
+#define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x))
+#undef MIN
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#undef MAX
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+/* Field access macros.  */
+#define SIZ(x) ((x)->_mp_size)
+#define ABSIZ(x) ABS (SIZ (x))
+#define PTR(x) ((x)->_mp_d)
+#define EXP(x) ((x)->_mp_exp)
+#define PREC(x) ((x)->_mp_prec)
+#define ALLOC(x) ((x)->_mp_alloc)
+#define NUM(x) mpq_numref(x)
+#define DEN(x) mpq_denref(x)
+
+/* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
+   then that lowest one bit must have been the only bit set.  n==0 will
+   return true though, so avoid that.  */
+#define POW2_P(n)  (((n) & ((n) - 1)) == 0)
+
+/* This is intended for constant THRESHOLDs only, where the compiler
+   can completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
+
+#define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
+
+/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
+   unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
+   treats the plain decimal values in <limits.h> as signed.  */
+#define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
+#define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
+#define USHRT_HIGHBIT      (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))
+#define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
+
+#if __GMP_MP_SIZE_T_INT
+#define MP_SIZE_T_MAX      INT_MAX
+#define MP_SIZE_T_MIN      INT_MIN
+#else
+#define MP_SIZE_T_MAX      LONG_MAX
+#define MP_SIZE_T_MIN      LONG_MIN
+#endif
+
+/* mp_exp_t is the same as mp_size_t */
+#define MP_EXP_T_MAX   MP_SIZE_T_MAX
+#define MP_EXP_T_MIN   MP_SIZE_T_MIN
+
+#define LONG_HIGHBIT       LONG_MIN
+#define INT_HIGHBIT        INT_MIN
+#define SHRT_HIGHBIT       SHRT_MIN
+
+
+#define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
+
+#if GMP_NAIL_BITS == 0
+#define GMP_NAIL_LOWBIT   CNST_LIMB(0)
+#else
+#define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
+#endif
+
+#if GMP_NAIL_BITS != 0
+/* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
+   code that has not yet been qualified.  */
+
+#undef  DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD              50
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef USE_PREINV_DIVREM_1
+#undef DIVREM_2_THRESHOLD
+#undef DIVEXACT_1_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
+#define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
+#define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
+#define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
+#define USE_PREINV_DIVREM_1               0  /* no preinv */
+#define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
+
+/* mpn/generic/mul_fft.c is not nails-capable. */
+#undef  MUL_FFT_THRESHOLD
+#undef  SQR_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
+#define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
+#endif
+
+/* Swap macros. */
+
+#define MP_LIMB_T_SWAP(x, y)						\
+  do {									\
+    mp_limb_t __mp_limb_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_limb_t_swap__tmp;					\
+  } while (0)
+#define MP_SIZE_T_SWAP(x, y)						\
+  do {									\
+    mp_size_t __mp_size_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_size_t_swap__tmp;					\
+  } while (0)
+
+#define MP_PTR_SWAP(x, y)						\
+  do {									\
+    mp_ptr __mp_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mp_ptr_swap__tmp;						\
+  } while (0)
+#define MP_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mp_srcptr __mp_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_srcptr_swap__tmp;					\
+  } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_PTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_SRCPTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+
+#define MPZ_PTR_SWAP(x, y)						\
+  do {									\
+    mpz_ptr __mpz_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mpz_ptr_swap__tmp;						\
+  } while (0)
+#define MPZ_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mpz_srcptr_swap__tmp;					\
+  } while (0)
+
+#define MPQ_PTR_SWAP(x, y)						\
+  do {                                                                  \
+    mpq_ptr __mpq_ptr_swap__tmp = (x);					\
+    (x) = (y);                                                          \
+    (y) = __mpq_ptr_swap__tmp;						\
+  } while (0)
+#define MPQ_SRCPTR_SWAP(x, y)                                           \
+  do {                                                                  \
+    mpq_srcptr __mpq_srcptr_swap__tmp = (x);                            \
+    (x) = (y);                                                          \
+    (y) = __mpq_srcptr_swap__tmp;                                       \
+  } while (0)
+
+
+/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
+   but current gcc (3.0) doesn't seem to support that.  */
+__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
+__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
+__GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
+
+__GMP_DECLSPEC void *__gmp_default_allocate (size_t);
+__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
+__GMP_DECLSPEC void __gmp_default_free (void *, size_t);
+
+#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
+  ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
+#define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
+
+#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)		\
+  ((type *) (*__gmp_reallocate_func)					\
+   (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
+#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size)		\
+  __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
+
+#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
+#define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
+
+#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)		\
+  do {									\
+    if ((oldsize) != (newsize))						\
+      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize);		\
+  } while (0)
+
+#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)	\
+  do {									\
+    if ((oldsize) != (newsize))						\
+      (ptr) = (type *) (*__gmp_reallocate_func)				\
+	(ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));	\
+  } while (0)
+
+
+/* Dummy for non-gcc, code involving it will go dead. */
+#if ! defined (__GNUC__) || __GNUC__ < 2
+#define __builtin_constant_p(x)   0
+#endif
+
+
+/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
+   stack usage is compatible.  __attribute__ ((regparm (N))) helps by
+   putting leading parameters in registers, avoiding extra stack.
+
+   regparm cannot be used with calls going through the PLT, because the
+   binding code there may clobber the registers (%eax, %edx, %ecx) used for
+   the regparm parameters.  Calls to local (ie. static) functions could
+   still use this, if we cared to differentiate locals and globals.
+
+   On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
+   -p or -pg profiling, since that version of gcc doesn't realize the
+   .mcount calls will clobber the parameter registers.  Other systems are
+   ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
+   bother to try to detect this.  regparm is only an optimization so we just
+   disable it when profiling (profiling being a slowdown anyway).  */
+
+#if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
+  && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
+#define USE_LEADING_REGPARM 1
+#else
+#define USE_LEADING_REGPARM 0
+#endif
+
+/* Macros for altering parameter order according to regparm usage. */
+#if USE_LEADING_REGPARM
+#define REGPARM_2_1(a,b,x)    x,a,b
+#define REGPARM_3_1(a,b,c,x)  x,a,b,c
+#define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
+#else
+#define REGPARM_2_1(a,b,x)    a,b,x
+#define REGPARM_3_1(a,b,c,x)  a,b,c,x
+#define REGPARM_ATTR(n)
+#endif
+
+
+/* ASM_L gives a local label for a gcc asm block, for use when temporary
+   local labels like "1:" might not be available, which is the case for
+   instance on the x86s (the SCO assembler doesn't support them).
+
+   The label generated is made unique by including "%=" which is a unique
+   number for each insn.  This ensures the same name can be used in multiple
+   asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
+   allowed there's no need for a label to be usable outside a single
+   block.  */
+
+#define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
+
+
+#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
+#if 0
+/* FIXME: Check that these actually improve things.
+   FIXME: Need a cld after each std.
+   FIXME: Can't have inputs in clobbered registers, must describe them as
+   dummy outputs, and add volatile. */
+#define MPN_COPY_INCR(DST, SRC, N)					\
+  __asm__ ("cld\n\trep\n\tmovsl" : :					\
+	   "D" (DST), "S" (SRC), "c" (N) :				\
+	   "cx", "di", "si", "memory")
+#define MPN_COPY_DECR(DST, SRC, N)					\
+  __asm__ ("std\n\trep\n\tmovsl" : :					\
+	   "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :	\
+	   "cx", "di", "si", "memory")
+#endif
+#endif
+
+
+__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
+#define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
+
+#define mpz_n_pow_ui __gmpz_n_pow_ui
+__GMP_DECLSPEC void    mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
+
+
+#define mpn_addmul_1c __MPN(addmul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+#ifndef mpn_addmul_2  /* if not done with cpuvec in a fat binary */
+#define mpn_addmul_2 __MPN(addmul_2)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
+
+#define mpn_addmul_3 __MPN(addmul_3)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_addmul_4 __MPN(addmul_4)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_addmul_5 __MPN(addmul_5)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_addmul_6 __MPN(addmul_6)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_addmul_7 __MPN(addmul_7)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_addmul_8 __MPN(addmul_8)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase.  */
+#define mpn_addmul_2s __MPN(addmul_2s)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+/* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n,
+   etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter.  Similarly,
+   override foo_ip1 functions with foo.  We then lie and say these macros
+   represent native functions, but leave a trace by using the value 2 rather
+   than 1.  */
+
+#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n
+#define mpn_addlsh1_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,1)
+#define HAVE_NATIVE_mpn_addlsh1_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc
+#define mpn_addlsh1_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,1,x)
+#define HAVE_NATIVE_mpn_addlsh1_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define mpn_addlsh1_n_ip1(a,b,n)        mpn_addlsh1_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_addlsh1_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#define mpn_addlsh1_nc_ip1(a,b,n,c)     mpn_addlsh1_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_addlsh1_nc_ip1  2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n
+#define mpn_addlsh2_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,2)
+#define HAVE_NATIVE_mpn_addlsh2_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc
+#define mpn_addlsh2_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,2,x)
+#define HAVE_NATIVE_mpn_addlsh2_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
+#define mpn_addlsh2_n_ip1(a,b,n)        mpn_addlsh2_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_addlsh2_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#define mpn_addlsh2_nc_ip1(a,b,n,c)     mpn_addlsh2_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_addlsh2_nc_ip1  2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n
+#define mpn_sublsh1_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,1)
+#define HAVE_NATIVE_mpn_sublsh1_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc
+#define mpn_sublsh1_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,1,x)
+#define HAVE_NATIVE_mpn_sublsh1_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
+#define mpn_sublsh1_n_ip1(a,b,n)        mpn_sublsh1_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_sublsh1_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#define mpn_sublsh1_nc_ip1(a,b,n,c)     mpn_sublsh1_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_sublsh1_nc_ip1  2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n
+#define mpn_sublsh2_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,2)
+#define HAVE_NATIVE_mpn_sublsh2_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc
+#define mpn_sublsh2_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,2,x)
+#define HAVE_NATIVE_mpn_sublsh2_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define mpn_sublsh2_n_ip1(a,b,n)        mpn_sublsh2_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_sublsh2_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#define mpn_sublsh2_nc_ip1(a,b,n,c)     mpn_sublsh2_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_sublsh2_nc_ip1  2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n
+#define mpn_rsblsh1_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,1)
+#define HAVE_NATIVE_mpn_rsblsh1_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc
+#define mpn_rsblsh1_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,1,x)
+#define HAVE_NATIVE_mpn_rsblsh1_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1
+#define mpn_rsblsh1_n_ip1(a,b,n)        mpn_rsblsh1_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_rsblsh1_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1
+#define mpn_rsblsh1_nc_ip1(a,b,n,c)     mpn_rsblsh1_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_rsblsh1_nc_ip1  2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n
+#define mpn_rsblsh2_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,2)
+#define HAVE_NATIVE_mpn_rsblsh2_n       2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc
+#define mpn_rsblsh2_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,2,x)
+#define HAVE_NATIVE_mpn_rsblsh2_nc      2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1
+#define mpn_rsblsh2_n_ip1(a,b,n)        mpn_rsblsh2_n(a,a,b,n)
+#define HAVE_NATIVE_mpn_rsblsh2_n_ip1   2
+#endif
+
+#if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1
+#define mpn_rsblsh2_nc_ip1(a,b,n,c)     mpn_rsblsh2_nc(a,a,b,n,c)
+#define HAVE_NATIVE_mpn_rsblsh2_nc_ip1  2
+#endif
+
+
+#ifndef mpn_addlsh1_n
+#define mpn_addlsh1_n __MPN(addlsh1_n)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_addlsh1_nc
+#define mpn_addlsh1_nc __MPN(addlsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_addlsh1_n_ip1
+#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_addlsh1_nc_ip1
+#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#ifndef mpn_addlsh2_n
+#define mpn_addlsh2_n __MPN(addlsh2_n)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_addlsh2_nc
+#define mpn_addlsh2_nc __MPN(addlsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_addlsh2_n_ip1
+#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_addlsh2_nc_ip1
+#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#ifndef mpn_addlsh_n
+#define mpn_addlsh_n __MPN(addlsh_n)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#ifndef mpn_addlsh_nc
+#define mpn_addlsh_nc __MPN(addlsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
+#ifndef mpn_addlsh_n_ip1
+#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
+  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#ifndef mpn_addlsh_nc_ip1
+#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
+
+#ifndef mpn_sublsh1_n
+#define mpn_sublsh1_n __MPN(sublsh1_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_sublsh1_nc
+#define mpn_sublsh1_nc __MPN(sublsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_sublsh1_n_ip1
+#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_sublsh1_nc_ip1
+#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#ifndef mpn_sublsh2_n
+#define mpn_sublsh2_n __MPN(sublsh2_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_sublsh2_nc
+#define mpn_sublsh2_nc __MPN(sublsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_sublsh2_n_ip1
+#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#ifndef mpn_sublsh2_nc_ip1
+#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#ifndef mpn_sublsh_n
+#define mpn_sublsh_n __MPN(sublsh_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#ifndef mpn_sublsh_nc
+#define mpn_sublsh_nc __MPN(sublsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
+#ifndef mpn_sublsh_n_ip1
+#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
+  __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#ifndef mpn_sublsh_nc_ip1
+#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
+
+#define mpn_rsblsh1_n __MPN(rsblsh1_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_rsblsh2_n __MPN(rsblsh2_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_rsblsh_n __MPN(rsblsh_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#define mpn_rsblsh_nc __MPN(rsblsh_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+
+#define mpn_rsh1add_n __MPN(rsh1add_n)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsh1add_nc __MPN(rsh1add_nc)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_rsh1sub_n __MPN(rsh1sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#ifndef mpn_lshiftc  /* if not done with cpuvec in a fat binary */
+#define mpn_lshiftc __MPN(lshiftc)
+__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+
+#define mpn_add_err1_n  __MPN(add_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err2_n  __MPN(add_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err3_n  __MPN(add_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err1_n  __MPN(sub_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err2_n  __MPN(sub_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err3_n  __MPN(sub_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_n_sub_n __MPN(add_n_sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
+__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+#define mpn_divrem_1c __MPN(divrem_1c)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+#define mpn_dump __MPN(dump)
+__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
+
+#define mpn_fib2_ui __MPN(fib2_ui)
+__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
+
+#define mpn_fib2m __MPN(fib2m)
+__GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_strongfibo __MPN(strongfibo)
+__GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr);
+
+/* Remap names of internal mpn functions.  */
+#define __clz_tab               __MPN(clz_tab)
+#define mpn_udiv_w_sdiv		__MPN(udiv_w_sdiv)
+
+#define mpn_jacobi_base __MPN(jacobi_base)
+__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
+
+#define mpn_jacobi_2 __MPN(jacobi_2)
+__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
+
+#define mpn_jacobi_n __MPN(jacobi_n)
+__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
+
+#define mpn_mod_1c __MPN(mod_1c)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mul_1c __MPN(mul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+#define mpn_mul_2 __MPN(mul_2)
+__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_3 __MPN(mul_3)
+__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_4 __MPN(mul_4)
+__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_5 __MPN(mul_5)
+__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_6 __MPN(mul_6)
+__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
+#define mpn_mul_basecase __MPN(mul_basecase)
+__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+#endif
+
+#define mpn_mullo_n __MPN(mullo_n)
+__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#ifndef mpn_mullo_basecase  /* if not done with cpuvec in a fat binary */
+#define mpn_mullo_basecase __MPN(mullo_basecase)
+__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+
+#ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
+#define mpn_sqr_basecase __MPN(sqr_basecase)
+__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+
+#define mpn_sqrlo __MPN(sqrlo)
+__GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_sqrlo_basecase __MPN(sqrlo_basecase)
+__GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid_basecase __MPN(mulmid_basecase)
+__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid_n __MPN(mulmid_n)
+__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid __MPN(mulmid)
+__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_submul_1c __MPN(submul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+#ifndef mpn_redc_1  /* if not done with cpuvec in a fat binary */
+#define mpn_redc_1 __MPN(redc_1)
+__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#ifndef mpn_redc_2  /* if not done with cpuvec in a fat binary */
+#define mpn_redc_2 __MPN(redc_2)
+__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
+
+#define mpn_redc_n __MPN(redc_n)
+__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+
+#ifndef mpn_mod_1_1p_cps  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
+__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
+#endif
+#ifndef mpn_mod_1_1p  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1_1p __MPN(mod_1_1p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#ifndef mpn_mod_1s_2p_cps  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_2p  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_2p __MPN(mod_1s_2p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#ifndef mpn_mod_1s_3p_cps  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_3p  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_3p __MPN(mod_1s_3p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#ifndef mpn_mod_1s_4p_cps  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_4p  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_1s_4p __MPN(mod_1s_4p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
+__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
+__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 4 +
+    (an > n ? (bn > n ? rn : n) : 0);
+  return itch;
+}
+
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+#define mpn_mulmod_bknp1 __MPN(mulmod_bknp1)
+__GMP_DECLSPEC void mpn_mulmod_bknp1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned, mp_ptr);
+static inline mp_size_t
+mpn_mulmod_bknp1_itch (mp_size_t rn) {
+  return rn << 2;
+}
+#if MOD_BKNP1_ONLY3
+#define MPN_MULMOD_BKNP1_USABLE(rn, k, mn)				\
+  ((GMP_NUMB_BITS % 8 == 0) && ((mn) >= 18) && ((rn) > 16) &&		\
+   (((rn) % ((k) = 3) == 0)))
+#else
+#define MPN_MULMOD_BKNP1_USABLE(rn, k, mn)				\
+  (((GMP_NUMB_BITS % 8 == 0) && ((mn) >= 18) && ((rn) > 16) &&		\
+    (((rn) % ((k) = 3) == 0) ||						\
+     (((GMP_NUMB_BITS % 16 != 0) || (((mn) >= 35) && ((rn) >= 32))) &&	\
+      (((GMP_NUMB_BITS % 16 == 0) && ((rn) % ((k) = 5) == 0)) ||	\
+       (((mn) >= 49) &&							\
+	(((rn) % ((k) = 7) == 0) ||					\
+	 ((GMP_NUMB_BITS % 16 == 0) && ((mn) >= 104) && ((rn) >= 64) &&	\
+	  ((MOD_BKNP1_USE11 && ((rn) % ((k) = 11) == 0)) ||		\
+	   ((rn) % ((k) = 13) == 0) ||					\
+	   ((GMP_NUMB_BITS % 32 == 0) && ((mn) >= 136) && ((rn) >= 128) && \
+	    ((rn) % ((k) = 17) == 0)					\
+	    ))))))))) ||						\
+  ((GMP_NUMB_BITS % 16 != 0) && MOD_BKNP1_USE11 &&			\
+   ((mn) >= 104) && ((rn) >= 64) && ((rn) % ((k) = 11) == 0)) )
+#endif
+
+#define mpn_sqrmod_bknp1 __MPN(sqrmod_bknp1)
+__GMP_DECLSPEC void mpn_sqrmod_bknp1 (mp_ptr, mp_srcptr, mp_size_t, unsigned, mp_ptr);
+static inline mp_size_t
+mpn_sqrmod_bknp1_itch (mp_size_t rn) {
+  return rn * 3;
+}
+#if MOD_BKNP1_ONLY3
+#define MPN_SQRMOD_BKNP1_USABLE(rn, k, mn)				\
+  MPN_MULMOD_BKNP1_USABLE(rn, k, mn)
+#else
+#define MPN_SQRMOD_BKNP1_USABLE(rn, k, mn)				\
+  (((GMP_NUMB_BITS % 8 == 0) && ((mn) >= 27) && ((rn) > 24) &&		\
+    (((rn) % ((k) = 3) == 0) ||						\
+     (((GMP_NUMB_BITS % 16 != 0) || (((mn) >= 55) && ((rn) > 50))) &&	\
+      (((GMP_NUMB_BITS % 16 == 0) && ((rn) % ((k) = 5) == 0)) ||	\
+       (((mn) >= 56) &&							\
+	(((rn) % ((k) = 7) == 0) ||					\
+	 ((GMP_NUMB_BITS % 16 == 0) && ((mn) >= 143) && ((rn) >= 128) && \
+	  ((MOD_BKNP1_USE11 && ((rn) % ((k) = 11) == 0)) ||		\
+	   ((rn) % ((k) = 13) == 0) ||					\
+	   ((GMP_NUMB_BITS % 32 == 0) && ((mn) >= 272) && ((rn) >= 256) && \
+	    ((rn) % ((k) = 17) == 0)					\
+	    ))))))))) ||						\
+   ((GMP_NUMB_BITS % 16 != 0) && MOD_BKNP1_USE11 &&			\
+    ((mn) >= 143) && ((rn) >= 128) && ((rn) % ((k) = 11) == 0)) )
+#endif
+
+
+#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
+__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 3 +
+    (an > n ? an : 0);
+  return itch;
+}
+
+/* Pseudo-random number generator function pointers structure.  */
+typedef struct {
+  void (*randseed_fn) (gmp_randstate_ptr, mpz_srcptr);
+  void (*randget_fn) (gmp_randstate_ptr, mp_ptr, unsigned long int);
+  void (*randclear_fn) (gmp_randstate_ptr);
+  void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
+} gmp_randfnptr_t;
+
+/* Macro to obtain a void pointer to the function pointers structure.  */
+#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
+
+/* Macro to obtain a pointer to the generator's state.
+   When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
+#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
+
+/* Write a given number of random bits to rp.  */
+#define _gmp_rand(rp, state, bits)					\
+  do {									\
+    gmp_randstate_ptr  __rstate = (state);				\
+    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)		\
+      (__rstate, rp, bits);						\
+  } while (0)
+
+__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_ptr);
+
+
+/* __gmp_rands is the global state for the old-style random functions, and
+   is also used in the test programs (hence the __GMP_DECLSPEC).
+
+   There's no seeding here, so mpz_random etc will generate the same
+   sequence every time.  This is not unlike the C library random functions
+   if you don't seed them, so perhaps it's acceptable.  Digging up a seed
+   from /dev/random or the like would work on many systems, but might
+   encourage a false confidence, since it'd be pretty much impossible to do
+   something that would work reliably everywhere.  In any case the new style
+   functions are recommended to applications which care about randomness, so
+   the old functions aren't too important.  */
+
+__GMP_DECLSPEC extern char             __gmp_rands_initialized;
+__GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
+
+#define RANDS								\
+  ((__gmp_rands_initialized ? 0						\
+    : (__gmp_rands_initialized = 1,					\
+       __gmp_randinit_mt_noseed (__gmp_rands), 0)),			\
+   __gmp_rands)
+
+/* this is used by the test programs, to free memory */
+#define RANDS_CLEAR()							\
+  do {									\
+    if (__gmp_rands_initialized)					\
+      {									\
+	__gmp_rands_initialized = 0;					\
+	gmp_randclear (__gmp_rands);					\
+      }									\
+  } while (0)
+
+
+/* For a threshold between algorithms A and B, size>=thresh is where B
+   should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
+   value 0 means only ever use B.  The tests for these special values will
+   be compile-time constants, so the compiler should be able to eliminate
+   the code for the unwanted algorithm.  */
+
+#if ! defined (__GNUC__) || __GNUC__ < 2
+#define ABOVE_THRESHOLD(size,thresh)					\
+  ((thresh) == 0							\
+   || ((thresh) != MP_SIZE_T_MAX					\
+       && (size) >= (thresh)))
+#else
+#define ABOVE_THRESHOLD(size,thresh)					\
+  ((__builtin_constant_p (thresh) && (thresh) == 0)			\
+   || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX)	\
+       && (size) >= (thresh)))
+#endif
+#define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
+
+/* The minimal supported value for Toom22 depends also on Toom32 and
+   Toom42 implementations. */
+#define MPN_TOOM22_MUL_MINSIZE    6
+#define MPN_TOOM2_SQR_MINSIZE     4
+
+#define MPN_TOOM33_MUL_MINSIZE   17
+#define MPN_TOOM3_SQR_MINSIZE    17
+
+#define MPN_TOOM44_MUL_MINSIZE   30
+#define MPN_TOOM4_SQR_MINSIZE    30
+
+#define MPN_TOOM6H_MUL_MINSIZE   46
+#define MPN_TOOM6_SQR_MINSIZE    46
+
+#define MPN_TOOM8H_MUL_MINSIZE   86
+#define MPN_TOOM8_SQR_MINSIZE    86
+
+#define MPN_TOOM32_MUL_MINSIZE   10
+#define MPN_TOOM42_MUL_MINSIZE   10
+#define MPN_TOOM43_MUL_MINSIZE   25
+#define MPN_TOOM53_MUL_MINSIZE   17
+#define MPN_TOOM54_MUL_MINSIZE   31
+#define MPN_TOOM63_MUL_MINSIZE   49
+
+#define MPN_TOOM42_MULMID_MINSIZE    4
+
+#define   mpn_sqr_diagonal __MPN(sqr_diagonal)
+__GMP_DECLSPEC void      mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
+__GMP_DECLSPEC void      mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
+
+enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
+#define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
+
+enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
+#define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
+
+#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
+
+#define   mpn_toom_couple_handling __MPN(toom_couple_handling)
+__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
+
+#define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
+__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
+__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
+__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
+
+#define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
+__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
+
+#define   mpn_toom22_mul __MPN(toom22_mul)
+__GMP_DECLSPEC void      mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom32_mul __MPN(toom32_mul)
+__GMP_DECLSPEC void      mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom42_mul __MPN(toom42_mul)
+__GMP_DECLSPEC void      mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom52_mul __MPN(toom52_mul)
+__GMP_DECLSPEC void      mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom62_mul __MPN(toom62_mul)
+__GMP_DECLSPEC void      mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom2_sqr __MPN(toom2_sqr)
+__GMP_DECLSPEC void      mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom33_mul __MPN(toom33_mul)
+__GMP_DECLSPEC void      mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom43_mul __MPN(toom43_mul)
+__GMP_DECLSPEC void      mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom53_mul __MPN(toom53_mul)
+__GMP_DECLSPEC void      mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom54_mul __MPN(toom54_mul)
+__GMP_DECLSPEC void      mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom63_mul __MPN(toom63_mul)
+__GMP_DECLSPEC void      mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom3_sqr __MPN(toom3_sqr)
+__GMP_DECLSPEC void      mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom44_mul __MPN(toom44_mul)
+__GMP_DECLSPEC void      mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom4_sqr __MPN(toom4_sqr)
+__GMP_DECLSPEC void      mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom6h_mul __MPN(toom6h_mul)
+__GMP_DECLSPEC void      mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void      mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom8h_mul __MPN(toom8h_mul)
+__GMP_DECLSPEC void      mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom8_sqr __MPN(toom8_sqr)
+__GMP_DECLSPEC void      mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom42_mulmid __MPN(toom42_mulmid)
+__GMP_DECLSPEC void      mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_fft_best_k __MPN(fft_best_k)
+__GMP_DECLSPEC int       mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
+
+#define   mpn_mul_fft __MPN(mul_fft)
+__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
+
+#define   mpn_mul_fft_full __MPN(mul_fft_full)
+__GMP_DECLSPEC void      mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
+__GMP_DECLSPEC void      mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define   mpn_fft_next_size __MPN(fft_next_size)
+__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
+
+#define   mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#define   mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
+
+#define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
+#define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
+
+#define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
+
+#define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
+
+#define   mpn_mu_div_qr __MPN(mu_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
+
+#define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_mu_divappr_q __MPN(mu_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
+
+#define   mpn_mu_div_q __MPN(mu_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
+
+#define  mpn_div_q __MPN(div_q)
+__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_invert __MPN(invert)
+__GMP_DECLSPEC void      mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_invert_itch(n)  mpn_invertappr_itch(n)
+
+#define   mpn_ni_invertappr __MPN(ni_invertappr)
+__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_invertappr __MPN(invertappr)
+__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_invertappr_itch(n)  (2 * (n))
+
+#define   mpn_binvert __MPN(binvert)
+__GMP_DECLSPEC void      mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_binvert_itch __MPN(binvert_itch)
+__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST;
+
+#define mpn_bdiv_q_1 __MPN(bdiv_q_1)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
+__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
+
+#define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
+__GMP_DECLSPEC void      mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+#define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
+__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
+__GMP_DECLSPEC void      mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_bdiv_qr __MPN(bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_bdiv_q __MPN(bdiv_q)
+__GMP_DECLSPEC void      mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define   mpn_divexact __MPN(divexact)
+__GMP_DECLSPEC void      mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+#define   mpn_divexact_itch __MPN(divexact_itch)
+__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#ifndef mpn_bdiv_dbm1c  /* if not done with cpuvec in a fat binary */
+#define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+#endif
+
+#define   mpn_bdiv_dbm1(dst, src, size, divisor) \
+  mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
+
+#define   mpn_powm __MPN(powm)
+__GMP_DECLSPEC void      mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_powlo __MPN(powlo)
+__GMP_DECLSPEC void      mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r)
+__GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+
+
+#ifndef DIVEXACT_BY3_METHOD
+#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
+#define DIVEXACT_BY3_METHOD 0	/* default to using mpn_bdiv_dbm1c */
+#else
+#define DIVEXACT_BY3_METHOD 1
+#endif
+#endif
+
+#if DIVEXACT_BY3_METHOD == 0
+#undef mpn_divexact_by3
+#define mpn_divexact_by3(dst,src,size) \
+  (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
+/* override mpn_divexact_by3c defined in gmp.h */
+/*
+#undef mpn_divexact_by3c
+#define mpn_divexact_by3c(dst,src,size,cy) \
+  (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
+*/
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+#define mpn_divexact_by5(dst,src,size) \
+  (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
+#endif
+
+#if GMP_NUMB_BITS % 3 == 0
+#define mpn_divexact_by7(dst,src,size) \
+  (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
+#endif
+
+#if GMP_NUMB_BITS % 6 == 0
+#define mpn_divexact_by9(dst,src,size) \
+  (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
+#endif
+
+#if GMP_NUMB_BITS % 10 == 0
+#define mpn_divexact_by11(dst,src,size) \
+  (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
+#endif
+
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by13(dst,src,size) \
+  (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+#define mpn_divexact_by15(dst,src,size) \
+  (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
+#endif
+
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by17(dst,src,size) \
+  (31 & 15 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 17)))
+#endif
+
+#define mpz_divexact_gcd  __gmpz_divexact_gcd
+__GMP_DECLSPEC void    mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_prodlimbs  __gmpz_prodlimbs
+__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
+
+#define mpz_oddfac_1  __gmpz_oddfac_1
+__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
+
+#define mpz_stronglucas  __gmpz_stronglucas
+__GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr);
+
+#define mpz_lucas_mod  __gmpz_lucas_mod
+__GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr);
+
+#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t  mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
+#endif
+
+#define mpn_divisible_p __MPN(divisible_p)
+__GMP_DECLSPEC int     mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+#define   mpn_rootrem __MPN(rootrem)
+__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot __MPN(broot)
+__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot_invm1 __MPN(broot_invm1)
+__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_brootinv __MPN(brootinv)
+__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+
+#define mpn_bsqrt __MPN(bsqrt)
+__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
+
+#define mpn_bsqrtinv __MPN(bsqrtinv)
+__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
+
+#if defined (_CRAY)
+#define MPN_COPY_INCR(dst, src, n)					\
+  do {									\
+    int __i;		/* Faster on some Crays with plain int */	\
+    _Pragma ("_CRI ivdep");						\
+    for (__i = 0; __i < (n); __i++)					\
+      (dst)[__i] = (src)[__i];						\
+  } while (0)
+#endif
+
+/* used by test programs, hence __GMP_DECLSPEC */
+#ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
+#define mpn_copyi __MPN(copyi)
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+
+#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
+#define MPN_COPY_INCR(dst, src, size)					\
+  do {									\
+    ASSERT ((size) >= 0);						\
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));			\
+    mpn_copyi (dst, src, size);						\
+  } while (0)
+#endif
+
+/* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
+#if ! defined (MPN_COPY_INCR)
+#define MPN_COPY_INCR(dst, src, n)					\
+  do {									\
+    ASSERT ((n) >= 0);							\
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));				\
+    if ((n) != 0)							\
+      {									\
+	mp_size_t __n = (n) - 1;					\
+	mp_ptr __dst = (dst);						\
+	mp_srcptr __src = (src);					\
+	mp_limb_t __x;							\
+	__x = *__src++;							\
+	if (__n != 0)							\
+	  {								\
+	    do								\
+	      {								\
+		*__dst++ = __x;						\
+		__x = *__src++;						\
+	      }								\
+	    while (--__n);						\
+	  }								\
+	*__dst++ = __x;							\
+      }									\
+  } while (0)
+#endif
+
+
+#if defined (_CRAY)
+#define MPN_COPY_DECR(dst, src, n)					\
+  do {									\
+    int __i;		/* Faster on some Crays with plain int */	\
+    _Pragma ("_CRI ivdep");						\
+    for (__i = (n) - 1; __i >= 0; __i--)				\
+      (dst)[__i] = (src)[__i];						\
+  } while (0)
+#endif
+
+/* used by test programs, hence __GMP_DECLSPEC */
+#ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
+#define mpn_copyd __MPN(copyd)
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+
+#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
+#define MPN_COPY_DECR(dst, src, size)					\
+  do {									\
+    ASSERT ((size) >= 0);						\
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));			\
+    mpn_copyd (dst, src, size);						\
+  } while (0)
+#endif
+
+/* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
+#if ! defined (MPN_COPY_DECR)
+#define MPN_COPY_DECR(dst, src, n)					\
+  do {									\
+    ASSERT ((n) >= 0);							\
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));				\
+    if ((n) != 0)							\
+      {									\
+	mp_size_t __n = (n) - 1;					\
+	mp_ptr __dst = (dst) + __n;					\
+	mp_srcptr __src = (src) + __n;					\
+	mp_limb_t __x;							\
+	__x = *__src--;							\
+	if (__n != 0)							\
+	  {								\
+	    do								\
+	      {								\
+		*__dst-- = __x;						\
+		__x = *__src--;						\
+	      }								\
+	    while (--__n);						\
+	  }								\
+	*__dst-- = __x;							\
+      }									\
+  } while (0)
+#endif
+
+
+#ifndef MPN_COPY
+#define MPN_COPY(d,s,n)							\
+  do {									\
+    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));				\
+    MPN_COPY_INCR (d, s, n);						\
+  } while (0)
+#endif
+
+
+/* Set {dst,size} to the limbs of {src,size} in reverse order. */
+#define MPN_REVERSE(dst, src, size)					\
+  do {									\
+    mp_ptr     __dst = (dst);						\
+    mp_size_t  __size = (size);						\
+    mp_srcptr  __src = (src) + __size - 1;				\
+    mp_size_t  __i;							\
+    ASSERT ((size) >= 0);						\
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
+    CRAY_Pragma ("_CRI ivdep");						\
+    for (__i = 0; __i < __size; __i++)					\
+      {									\
+	*__dst = *__src;						\
+	__dst++;							\
+	__src--;							\
+      }									\
+  } while (0)
+
+
+/* Zero n limbs at dst.
+
+   For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
+   ppc630 for instance this is optimal since it can sustain only 1 store per
+   cycle.
+
+   gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
+   "for" loop in the generic code below can become stu/bdnz.  The do/while
+   here helps it get to that.  The same caveat about plain -mpowerpc64 mode
+   applies here as to __GMPN_COPY_INCR in gmp.h.
+
+   xlc 3.1 already generates stu/bdnz from the generic C, and does so from
+   this loop too.
+
+   Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
+   at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
+   trouble than it's worth to do the same, though perhaps a call to memset
+   would be good when on a GNU system.  */
+
+#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
+#define MPN_FILL(dst, n, f)						\
+  do {									\
+    mp_ptr __dst = (dst) - 1;						\
+    mp_size_t __n = (n);						\
+    ASSERT (__n > 0);							\
+    do									\
+      *++__dst = (f);							\
+    while (--__n);							\
+  } while (0)
+#endif
+
+#ifndef MPN_FILL
+#define MPN_FILL(dst, n, f)						\
+  do {									\
+    mp_ptr __dst = (dst);						\
+    mp_size_t __n = (n);						\
+    ASSERT (__n > 0);							\
+    do									\
+      *__dst++ = (f);							\
+    while (--__n);							\
+  } while (0)
+#endif
+
+#define MPN_ZERO(dst, n)						\
+  do {									\
+    ASSERT ((n) >= 0);							\
+    if ((n) != 0)							\
+      MPN_FILL (dst, n, CNST_LIMB (0));					\
+  } while (0)
+
+/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
+   start up and would need to strip a lot of zeros before it'd be faster
+   than a simple cmpl loop.  Here are some times in cycles for
+   std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
+   low zeros).
+
+		std   cld
+	   P5    18    16
+	   P6    46    38
+	   K6    36    13
+	   K7    21    20
+*/
+#ifndef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+  do {									\
+    while ((NLIMBS) > 0)						\
+      {									\
+	if ((DST)[(NLIMBS) - 1] != 0)					\
+	  break;							\
+	(NLIMBS)--;							\
+      }									\
+  } while (0)
+#endif
+#ifndef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)				\
+  do {									\
+    while (1)								\
+      {									\
+	ASSERT ((NLIMBS) >= 1);						\
+	if ((DST)[(NLIMBS) - 1] != 0)					\
+	  break;							\
+	(NLIMBS)--;							\
+      }									\
+  } while (0)
+#endif
+
+/* Strip least significant zero limbs from {ptr,size} by incrementing ptr
+   and decrementing size.  low should be ptr[0], and will be the new ptr[0]
+   on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
+   somewhere a non-zero limb.  */
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)			\
+  do {									\
+    ASSERT ((size) >= 1);						\
+    ASSERT ((low) == (ptr)[0]);						\
+									\
+    while ((low) == 0)							\
+      {									\
+	(size)--;							\
+	ASSERT ((size) >= 1);						\
+	(ptr)++;							\
+	(low) = *(ptr);							\
+      }									\
+  } while (0)
+
+/* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
+   temporary variable; it will be automatically cleared out at function
+   return.  We use __x here to make it possible to accept both mpz_ptr and
+   mpz_t arguments.  */
+#define MPZ_TMP_INIT(X, NLIMBS)						\
+  do {									\
+    mpz_ptr __x = (X);							\
+    ASSERT ((NLIMBS) >= 1);						\
+    __x->_mp_alloc = (NLIMBS);						\
+    __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);				\
+  } while (0)
+
+#if WANT_ASSERT
+static inline void *
+_mpz_newalloc (mpz_ptr z, mp_size_t n)
+{
+  void * res = _mpz_realloc(z,n);
+  /* If we are checking the code, force a random change to limbs. */
+  ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
+  return res;
+}
+#else
+#define _mpz_newalloc _mpz_realloc
+#endif
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
+#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
+			  ? (mp_ptr) _mpz_realloc(z,n)			\
+			  : PTR(z))
+#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
+			   ? (mp_ptr) _mpz_newalloc(z,n)		\
+			   : PTR(z))
+
+#define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
+
+
+/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
+   f1p.
+
+   From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
+   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
+   number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
+
+   The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
+   without good floating point.  There's +2 for rounding up, and a further
+   +2 since at the last step x limbs are doubled into a 2x+1 limb region
+   whereas the actual F[2k] value might be only 2x-1 limbs.
+
+   Note that a division is done first, since on a 32-bit system it's at
+   least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
+   about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
+   whatever a multiply of two 190Mbyte numbers takes.)
+
+   Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
+   worked into the multiplier.  */
+
+#define MPN_FIB2_SIZE(n) \
+  ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
+
+
+/* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
+   -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
+
+   FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
+   F[n] + 2*F[n-1] fits in a limb.  */
+
+__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
+#define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
+
+extern const mp_limb_t __gmp_oddfac_table[];
+extern const mp_limb_t __gmp_odd2fac_table[];
+extern const unsigned char __gmp_fac2cnt_table[];
+extern const mp_limb_t __gmp_limbroots_table[];
+
+/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
+static inline unsigned
+log_n_max (mp_limb_t n)
+{
+  unsigned log;
+  for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
+  return log;
+}
+
+#define SIEVESIZE 512		/* FIXME: Allow gmp_init_primesieve to choose */
+typedef struct
+{
+  unsigned long d;		   /* current index in s[] */
+  unsigned long s0;		   /* number corresponding to s[0] */
+  unsigned long sqrt_s0;	   /* misnomer for sqrt(s[SIEVESIZE-1]) */
+  unsigned char s[SIEVESIZE + 1];  /* sieve table */
+} gmp_primesieve_t;
+
+#define gmp_init_primesieve __gmp_init_primesieve
+__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
+
+#define gmp_nextprime __gmp_nextprime
+__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
+
+#define gmp_primesieve __gmp_primesieve
+__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
+
+
+#ifndef MUL_TOOM22_THRESHOLD
+#define MUL_TOOM22_THRESHOLD             30
+#endif
+
+#ifndef MUL_TOOM33_THRESHOLD
+#define MUL_TOOM33_THRESHOLD            100
+#endif
+
+#ifndef MUL_TOOM44_THRESHOLD
+#define MUL_TOOM44_THRESHOLD            300
+#endif
+
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD            350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#ifndef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD            450
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD  110
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD  100
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
+#endif
+
+#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD  150
+#endif
+
+/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
+   normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
+   binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
+   separate hard limit will have been defined.  Similarly for TOOM3.  */
+#ifndef MUL_TOOM22_THRESHOLD_LIMIT
+#define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
+#endif
+#ifndef MUL_TOOM33_THRESHOLD_LIMIT
+#define MUL_TOOM33_THRESHOLD_LIMIT  MUL_TOOM33_THRESHOLD
+#endif
+#ifndef MULLO_BASECASE_THRESHOLD_LIMIT
+#define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
+#endif
+#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
+#define SQRLO_BASECASE_THRESHOLD_LIMIT  SQRLO_BASECASE_THRESHOLD
+#endif
+#ifndef SQRLO_DC_THRESHOLD_LIMIT
+#define SQRLO_DC_THRESHOLD_LIMIT  SQRLO_DC_THRESHOLD
+#endif
+
+/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
+   mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
+   certainly always want it if there's a native assembler mpn_sqr_basecase.)
+
+   If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
+   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
+   threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
+   because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
+   should be used, and that may be never.  */
+
+#ifndef SQR_BASECASE_THRESHOLD
+#define SQR_BASECASE_THRESHOLD            0  /* never use mpn_mul_basecase */
+#endif
+
+#ifndef SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD              50
+#endif
+
+#ifndef SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD             120
+#endif
+
+#ifndef SQR_TOOM4_THRESHOLD
+#define SQR_TOOM4_THRESHOLD             400
+#endif
+
+/* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
+#ifndef SQR_TOOM3_THRESHOLD_LIMIT
+#define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
+#endif
+
+#ifndef MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD     MUL_TOOM22_THRESHOLD
+#endif
+
+#ifndef MULLO_BASECASE_THRESHOLD
+#define MULLO_BASECASE_THRESHOLD          0  /* never use mpn_mul_basecase */
+#endif
+
+#ifndef MULLO_DC_THRESHOLD
+#define MULLO_DC_THRESHOLD         (2*MUL_TOOM22_THRESHOLD)
+#endif
+
+#ifndef MULLO_MUL_N_THRESHOLD
+#define MULLO_MUL_N_THRESHOLD      (2*MUL_FFT_THRESHOLD)
+#endif
+
+#ifndef SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD          0  /* never use mpn_sqr_basecase */
+#endif
+
+#ifndef SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD         (MULLO_DC_THRESHOLD)
+#endif
+
+#ifndef SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD        (MULLO_MUL_N_THRESHOLD)
+#endif
+
+#ifndef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD        (2*MUL_TOOM22_THRESHOLD)
+#endif
+
+#ifndef DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD          200
+#endif
+
+#ifndef DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD       (2*MUL_TOOM22_THRESHOLD)
+#endif
+
+#ifndef DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD             180
+#endif
+
+#ifndef DIVEXACT_JEB_THRESHOLD
+#define DIVEXACT_JEB_THRESHOLD           25
+#endif
+
+#ifndef INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD  (4*MULMOD_BNM1_THRESHOLD)
+#endif
+
+#ifndef INV_APPR_THRESHOLD
+#define INV_APPR_THRESHOLD         INV_NEWTON_THRESHOLD
+#endif
+
+#ifndef INV_NEWTON_THRESHOLD
+#define INV_NEWTON_THRESHOLD            200
+#endif
+
+#ifndef BINV_NEWTON_THRESHOLD
+#define BINV_NEWTON_THRESHOLD           300
+#endif
+
+#ifndef MU_DIVAPPR_Q_THRESHOLD
+#define MU_DIVAPPR_Q_THRESHOLD         2000
+#endif
+
+#ifndef MU_DIV_QR_THRESHOLD
+#define MU_DIV_QR_THRESHOLD            2000
+#endif
+
+#ifndef MUPI_DIV_QR_THRESHOLD
+#define MUPI_DIV_QR_THRESHOLD           200
+#endif
+
+#ifndef MU_BDIV_Q_THRESHOLD
+#define MU_BDIV_Q_THRESHOLD            2000
+#endif
+
+#ifndef MU_BDIV_QR_THRESHOLD
+#define MU_BDIV_QR_THRESHOLD           2000
+#endif
+
+#ifndef MULMOD_BNM1_THRESHOLD
+#define MULMOD_BNM1_THRESHOLD            16
+#endif
+
+#ifndef SQRMOD_BNM1_THRESHOLD
+#define SQRMOD_BNM1_THRESHOLD            16
+#endif
+
+#ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
+#define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD  (INV_MULMOD_BNM1_THRESHOLD/2)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+
+#ifndef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD       15
+#endif
+#ifndef REDC_2_TO_REDC_N_THRESHOLD
+#define REDC_2_TO_REDC_N_THRESHOLD      100
+#endif
+
+#else
+
+#ifndef REDC_1_TO_REDC_N_THRESHOLD
+#define REDC_1_TO_REDC_N_THRESHOLD      100
+#endif
+
+#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
+
+
+/* First k to use for an FFT modF multiply.  A modF FFT is an order
+   log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
+   whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
+#define FFT_FIRST_K  4
+
+/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
+#ifndef MUL_FFT_MODF_THRESHOLD
+#define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM33_THRESHOLD * 3)
+#endif
+#ifndef SQR_FFT_MODF_THRESHOLD
+#define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
+#endif
+
+/* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
+   will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
+   NxN->2N multiply and not recursing into itself is an order
+   log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
+   is the first better than toom3.  */
+#ifndef MUL_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
+#endif
+#ifndef SQR_FFT_THRESHOLD
+#define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
+#endif
+
+/* Table of thresholds for successive modF FFT "k"s.  The first entry is
+   where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
+   etc.  See mpn_fft_best_k(). */
+#ifndef MUL_FFT_TABLE
+#define MUL_FFT_TABLE							\
+  { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */				\
+    MUL_TOOM33_THRESHOLD * 8,   /* k=6 */				\
+    MUL_TOOM33_THRESHOLD * 16,  /* k=7 */				\
+    MUL_TOOM33_THRESHOLD * 32,  /* k=8 */				\
+    MUL_TOOM33_THRESHOLD * 96,  /* k=9 */				\
+    MUL_TOOM33_THRESHOLD * 288, /* k=10 */				\
+    0 }
+#endif
+#ifndef SQR_FFT_TABLE
+#define SQR_FFT_TABLE							\
+  { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */				\
+    SQR_TOOM3_THRESHOLD * 8,   /* k=6 */				\
+    SQR_TOOM3_THRESHOLD * 16,  /* k=7 */				\
+    SQR_TOOM3_THRESHOLD * 32,  /* k=8 */				\
+    SQR_TOOM3_THRESHOLD * 96,  /* k=9 */				\
+    SQR_TOOM3_THRESHOLD * 288, /* k=10 */				\
+    0 }
+#endif
+
+struct fft_table_nk
+{
+  gmp_uint_least32_t n:27;
+  gmp_uint_least32_t k:5;
+};
+
+#ifndef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS   static const
+#endif
+
+#define MPN_FFT_TABLE_SIZE  16
+
+
+#ifndef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD    (3 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#ifndef GET_STR_DC_THRESHOLD
+#define GET_STR_DC_THRESHOLD             18
+#endif
+
+#ifndef GET_STR_PRECOMPUTE_THRESHOLD
+#define GET_STR_PRECOMPUTE_THRESHOLD     35
+#endif
+
+#ifndef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD            750
+#endif
+
+#ifndef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD   2000
+#endif
+
+#ifndef FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD    35
+#endif
+
+#ifndef FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD   400
+#endif
+
+/* Return non-zero if xp,xsize and yp,ysize overlap.
+   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
+   overlap.  If both these are false, there's an overlap. */
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize)				\
+  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+#define MEM_OVERLAP_P(xp, xsize, yp, ysize)				\
+  (   (char *) (xp) + (xsize) > (char *) (yp)				\
+   && (char *) (yp) + (ysize) > (char *) (xp))
+
+/* Return non-zero if xp,xsize and yp,ysize are either identical or not
+   overlapping.  Return zero if they're partially overlapping. */
+#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)				\
+  MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
+#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)			\
+  ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
+
+/* Return non-zero if dst,dsize and src,ssize are either identical or
+   overlapping in a way suitable for an incrementing/decrementing algorithm.
+   Return zero if they're partially overlapping in an unsuitable fashion. */
+#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)			\
+  ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+#define MPN_SAME_OR_INCR_P(dst, src, size)				\
+  MPN_SAME_OR_INCR2_P(dst, size, src, size)
+#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)			\
+  ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+#define MPN_SAME_OR_DECR_P(dst, src, size)				\
+  MPN_SAME_OR_DECR2_P(dst, size, src, size)
+
+
+/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
+   ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
+   does it always.  Generally assertions are meant for development, but
+   might help when looking for a problem later too.  */
+
+#ifdef __LINE__
+#define ASSERT_LINE  __LINE__
+#else
+#define ASSERT_LINE  -1
+#endif
+
+#ifdef __FILE__
+#define ASSERT_FILE  __FILE__
+#else
+#define ASSERT_FILE  ""
+#endif
+
+__GMP_DECLSPEC void __gmp_assert_header (const char *, int);
+__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
+
+#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
+
+#define ASSERT_ALWAYS(expr)						\
+  do {									\
+    if (UNLIKELY (!(expr)))						\
+      ASSERT_FAIL (expr);						\
+  } while (0)
+
+#if WANT_ASSERT
+#define ASSERT(expr)   ASSERT_ALWAYS (expr)
+#else
+#define ASSERT(expr)   do {} while (0)
+#endif
+
+
+/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
+   that it's zero.  In both cases if assertion checking is disabled the
+   expression is still evaluated.  These macros are meant for use with
+   routines like mpn_add_n() where the return value represents a carry or
+   whatever that should or shouldn't occur in some context.  For example,
+   ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
+#if WANT_ASSERT
+#define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
+#define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
+#else
+#define ASSERT_CARRY(expr)     (expr)
+#define ASSERT_NOCARRY(expr)   (expr)
+#endif
+
+
+/* ASSERT_CODE includes code when assertion checking is wanted.  This is the
+   same as writing "#if WANT_ASSERT", but more compact.  */
+#if WANT_ASSERT
+#define ASSERT_CODE(expr)  expr
+#else
+#define ASSERT_CODE(expr)
+#endif
+
+
+/* Test that an mpq_t is in fully canonical form.  This can be used as
+   protection on routines like mpq_equal which give wrong results on
+   non-canonical inputs.  */
+#if WANT_ASSERT
+#define ASSERT_MPQ_CANONICAL(q)						\
+  do {									\
+    ASSERT (q->_mp_den._mp_size > 0);					\
+    if (q->_mp_num._mp_size == 0)					\
+      {									\
+	/* zero should be 0/1 */					\
+	ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);			\
+      }									\
+    else								\
+      {									\
+	/* no common factors */						\
+	mpz_t  __g;							\
+	mpz_init (__g);							\
+	mpz_gcd (__g, mpq_numref(q), mpq_denref(q));			\
+	ASSERT (mpz_cmp_ui (__g, 1) == 0);				\
+	mpz_clear (__g);						\
+      }									\
+  } while (0)
+#else
+#define ASSERT_MPQ_CANONICAL(q)	 do {} while (0)
+#endif
+
+/* Check that the nail parts are zero. */
+#define ASSERT_ALWAYS_LIMB(limb)					\
+  do {									\
+    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK;				\
+    ASSERT_ALWAYS (__nail == 0);					\
+  } while (0)
+#define ASSERT_ALWAYS_MPN(ptr, size)					\
+  do {									\
+    /* let whole loop go dead when no nails */				\
+    if (GMP_NAIL_BITS != 0)						\
+      {									\
+	mp_size_t  __i;							\
+	for (__i = 0; __i < (size); __i++)				\
+	  ASSERT_ALWAYS_LIMB ((ptr)[__i]);				\
+      }									\
+  } while (0)
+#if WANT_ASSERT
+#define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
+#define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
+#else
+#define ASSERT_LIMB(limb)       do {} while (0)
+#define ASSERT_MPN(ptr, size)   do {} while (0)
+#endif
+
+
+/* Assert that an mpn region {ptr,size} is zero, or non-zero.
+   size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
+#if WANT_ASSERT
+#define ASSERT_MPN_ZERO_P(ptr,size)					\
+  do {									\
+    mp_size_t  __i;							\
+    ASSERT ((size) >= 0);						\
+    for (__i = 0; __i < (size); __i++)					\
+      ASSERT ((ptr)[__i] == 0);						\
+  } while (0)
+#define ASSERT_MPN_NONZERO_P(ptr,size)					\
+  do {									\
+    mp_size_t  __i;							\
+    int	       __nonzero = 0;						\
+    ASSERT ((size) >= 0);						\
+    for (__i = 0; __i < (size); __i++)					\
+      if ((ptr)[__i] != 0)						\
+	{								\
+	  __nonzero = 1;						\
+	  break;							\
+	}								\
+    ASSERT (__nonzero);							\
+  } while (0)
+#else
+#define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
+#define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
+#endif
+
+
+#if ! HAVE_NATIVE_mpn_com
+#undef mpn_com
+#define mpn_com(d,s,n)							\
+  do {									\
+    mp_ptr     __d = (d);						\
+    mp_srcptr  __s = (s);						\
+    mp_size_t  __n = (n);						\
+    ASSERT (__n >= 1);							\
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));			\
+    do									\
+      *__d++ = (~ *__s++) & GMP_NUMB_MASK;				\
+    while (--__n);							\
+  } while (0)
+#endif
+
+#define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation)			\
+  do {									\
+    mp_srcptr	__up = (up);						\
+    mp_srcptr	__vp = (vp);						\
+    mp_ptr	__rp = (rp);						\
+    mp_size_t	__n = (n);						\
+    mp_limb_t __a, __b;							\
+    ASSERT (__n > 0);							\
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n));			\
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n));			\
+    __up += __n;							\
+    __vp += __n;							\
+    __rp += __n;							\
+    __n = -__n;								\
+    do {								\
+      __a = __up[__n];							\
+      __b = __vp[__n];							\
+      __rp[__n] = operation;						\
+    } while (++__n);							\
+  } while (0)
+
+
+#if ! HAVE_NATIVE_mpn_and_n
+#undef mpn_and_n
+#define mpn_and_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_andn_n
+#undef mpn_andn_n
+#define mpn_andn_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_nand_n
+#undef mpn_nand_n
+#define mpn_nand_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_ior_n
+#undef mpn_ior_n
+#define mpn_ior_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_iorn_n
+#undef mpn_iorn_n
+#define mpn_iorn_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_nior_n
+#undef mpn_nior_n
+#define mpn_nior_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_xor_n
+#undef mpn_xor_n
+#define mpn_xor_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_xnor_n
+#undef mpn_xnor_n
+#define mpn_xnor_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
+#endif
+
+#define mpn_trialdiv __MPN(trialdiv)
+__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
+
+#define mpn_remove __MPN(remove)
+__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
+
+
+/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
+#if GMP_NAIL_BITS == 0
+#define ADDC_LIMB(cout, w, x, y)					\
+  do {									\
+    mp_limb_t  __x = (x);						\
+    mp_limb_t  __y = (y);						\
+    mp_limb_t  __w = __x + __y;						\
+    (w) = __w;								\
+    (cout) = __w < __x;							\
+  } while (0)
+#else
+#define ADDC_LIMB(cout, w, x, y)					\
+  do {									\
+    mp_limb_t  __w;							\
+    ASSERT_LIMB (x);							\
+    ASSERT_LIMB (y);							\
+    __w = (x) + (y);							\
+    (w) = __w & GMP_NUMB_MASK;						\
+    (cout) = __w >> GMP_NUMB_BITS;					\
+  } while (0)
+#endif
+
+/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
+   subtract.  */
+#if GMP_NAIL_BITS == 0
+#define SUBC_LIMB(cout, w, x, y)					\
+  do {									\
+    mp_limb_t  __x = (x);						\
+    mp_limb_t  __y = (y);						\
+    mp_limb_t  __w = __x - __y;						\
+    (w) = __w;								\
+    (cout) = __w > __x;							\
+  } while (0)
+#else
+#define SUBC_LIMB(cout, w, x, y)					\
+  do {									\
+    mp_limb_t  __w = (x) - (y);						\
+    (w) = __w & GMP_NUMB_MASK;						\
+    (cout) = __w >> (GMP_LIMB_BITS-1);					\
+  } while (0)
+#endif
+
+
+/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
+   expecting no carry (or borrow) from that.
+
+   The size parameter is only for the benefit of assertion checking.  In a
+   normal build it's unused and the carry/borrow is just propagated as far
+   as it needs to go.
+
+   On random data, usually only one or two limbs of {ptr,size} get updated,
+   so there's no need for any sophisticated looping, just something compact
+   and sensible.
+
+   FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
+   declaring their operand sizes, then remove the former.  This is purely
+   for the benefit of assertion checking.  */
+
+#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM)	\
+  && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
+  && ! WANT_ASSERT
+/* Better flags handling than the generic C gives on i386, saving a few
+   bytes of code and maybe a cycle or two.  */
+
+#define MPN_IORD_U(ptr, incr, aors)					\
+  do {									\
+    mp_ptr  __ptr_dummy;						\
+    if (__builtin_constant_p (incr) && (incr) == 0)			\
+      {									\
+      }									\
+    else if (__builtin_constant_p (incr) && (incr) == 1)		\
+      {									\
+	__asm__ __volatile__						\
+	  ("\n" ASM_L(top) ":\n"					\
+	   "\t" aors "\t$1, (%0)\n"					\
+	   "\tlea\t%c2(%0), %0\n"					\
+	   "\tjc\t" ASM_L(top)						\
+	   : "=r" (__ptr_dummy)						\
+	   : "0"  (ptr), "n" (sizeof(mp_limb_t))			\
+	   : "memory");							\
+      }									\
+    else								\
+      {									\
+	__asm__ __volatile__						\
+	  (   aors  "\t%2, (%0)\n"					\
+	   "\tjnc\t" ASM_L(done) "\n"					\
+	   ASM_L(top) ":\n"						\
+	   "\t" aors "\t$1, %c3(%0)\n"					\
+	   "\tlea\t%c3(%0), %0\n"					\
+	   "\tjc\t" ASM_L(top) "\n"					\
+	   ASM_L(done) ":\n"						\
+	   : "=r" (__ptr_dummy)						\
+	   : "0"  (ptr),						\
+	     "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t))		\
+	   : "memory");							\
+      }									\
+  } while (0)
+
+#if GMP_LIMB_BITS == 32
+#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
+#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
+#endif
+#if GMP_LIMB_BITS == 64
+#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addq")
+#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subq")
+#endif
+#define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
+#define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
+#endif
+
+#if GMP_NAIL_BITS == 0
+#ifndef mpn_incr_u
+#define mpn_incr_u(p,incr)						\
+  do {									\
+    mp_limb_t __x;							\
+    mp_ptr __p = (p);							\
+    if (__builtin_constant_p (incr) && (incr) == 1)			\
+      {									\
+	while (++(*(__p++)) == 0)					\
+	  ;								\
+      }									\
+    else								\
+      {									\
+	__x = *__p + (incr);						\
+	*__p = __x;							\
+	if (__x < (incr))						\
+	  while (++(*(++__p)) == 0)					\
+	    ;								\
+      }									\
+  } while (0)
+#endif
+#ifndef mpn_decr_u
+#define mpn_decr_u(p,incr)						\
+  do {									\
+    mp_limb_t __x;							\
+    mp_ptr __p = (p);							\
+    if (__builtin_constant_p (incr) && (incr) == 1)			\
+      {									\
+	while ((*(__p++))-- == 0)					\
+	  ;								\
+      }									\
+    else								\
+      {									\
+	__x = *__p;							\
+	*__p = __x - (incr);						\
+	if (__x < (incr))						\
+	  while ((*(++__p))-- == 0)					\
+	    ;								\
+      }									\
+  } while (0)
+#endif
+#endif
+
+#if GMP_NAIL_BITS >= 1
+#ifndef mpn_incr_u
+#define mpn_incr_u(p,incr)						\
+  do {									\
+    mp_limb_t __x;							\
+    mp_ptr __p = (p);							\
+    if (__builtin_constant_p (incr) && (incr) == 1)			\
+      {									\
+	do								\
+	  {								\
+	    __x = (*__p + 1) & GMP_NUMB_MASK;				\
+	    *__p++ = __x;						\
+	  }								\
+	while (__x == 0);						\
+      }									\
+    else								\
+      {									\
+	__x = (*__p + (incr));						\
+	*__p++ = __x & GMP_NUMB_MASK;					\
+	if (__x >> GMP_NUMB_BITS != 0)					\
+	  {								\
+	    do								\
+	      {								\
+		__x = (*__p + 1) & GMP_NUMB_MASK;			\
+		*__p++ = __x;						\
+	      }								\
+	    while (__x == 0);						\
+	  }								\
+      }									\
+  } while (0)
+#endif
+#ifndef mpn_decr_u
+#define mpn_decr_u(p,incr)						\
+  do {									\
+    mp_limb_t __x;							\
+    mp_ptr __p = (p);							\
+    if (__builtin_constant_p (incr) && (incr) == 1)			\
+      {									\
+	do								\
+	  {								\
+	    __x = *__p;							\
+	    *__p++ = (__x - 1) & GMP_NUMB_MASK;				\
+	  }								\
+	while (__x == 0);						\
+      }									\
+    else								\
+      {									\
+	__x = *__p - (incr);						\
+	*__p++ = __x & GMP_NUMB_MASK;					\
+	if (__x >> GMP_NUMB_BITS != 0)					\
+	  {								\
+	    do								\
+	      {								\
+		__x = *__p;						\
+		*__p++ = (__x - 1) & GMP_NUMB_MASK;			\
+	      }								\
+	    while (__x == 0);						\
+	  }								\
+      }									\
+  } while (0)
+#endif
+#endif
+
+#ifndef MPN_INCR_U
+#if WANT_ASSERT
+#define MPN_INCR_U(ptr, size, n)					\
+  do {									\
+    ASSERT ((size) >= 1);						\
+    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));			\
+  } while (0)
+#else
+#define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
+#endif
+#endif
+
+#ifndef MPN_DECR_U
+#if WANT_ASSERT
+#define MPN_DECR_U(ptr, size, n)					\
+  do {									\
+    ASSERT ((size) >= 1);						\
+    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));			\
+  } while (0)
+#else
+#define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
+#endif
+#endif
+
+
+/* Structure for conversion between internal binary format and strings.  */
+struct bases
+{
+  /* Number of digits in the conversion base that always fits in an mp_limb_t.
+     For example, for base 10 on a machine where an mp_limb_t has 32 bits this
+     is 9, since 10**9 is the largest number that fits into an mp_limb_t.  */
+  int chars_per_limb;
+
+  /* log(2)/log(conversion_base) */
+  mp_limb_t logb2;
+
+  /* log(conversion_base)/log(2) */
+  mp_limb_t log2b;
+
+  /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
+     factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
+     i.e. the number of bits used to represent each digit in the base.  */
+  mp_limb_t big_base;
+
+  /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
+     fixed-point number.  Instead of dividing by big_base an application can
+     choose to multiply by big_base_inverted.  */
+  mp_limb_t big_base_inverted;
+};
+
+#define   mp_bases __MPN(bases)
+__GMP_DECLSPEC extern const struct bases mp_bases[257];
+
+
+/* Compute the number of digits in base for nbits bits, making sure the result
+   is never too small.  The two variants of the macro implement the same
+   function; the GT2 variant below works just for bases > 2.  */
+#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b)				\
+  do {									\
+    mp_limb_t _ph, _dummy;						\
+    size_t _nbits = (nbits);						\
+    umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits);			\
+    _ph += (_dummy + _nbits < _dummy);					\
+    res = _ph + 1;							\
+  } while (0)
+#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b)			\
+  do {									\
+    mp_limb_t _ph, _dummy;						\
+    size_t _nbits = (nbits);						\
+    umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits);		\
+    res = _ph + 1;							\
+  } while (0)
+
+/* For power of 2 bases this is exact.  For other bases the result is either
+   exact or one too big.
+
+   To be exact always it'd be necessary to examine all the limbs of the
+   operand, since numbers like 100..000 and 99...999 generally differ only
+   in the lowest limb.  It'd be possible to examine just a couple of high
+   limbs to increase the probability of being exact, but that doesn't seem
+   worth bothering with.  */
+
+#define MPN_SIZEINBASE(result, ptr, size, base)				\
+  do {									\
+    int	   __lb_base, __cnt;						\
+    size_t __totbits;							\
+									\
+    ASSERT ((size) >= 0);						\
+    ASSERT ((base) >= 2);						\
+    ASSERT ((base) < numberof (mp_bases));				\
+									\
+    /* Special case for X == 0.  */					\
+    if ((size) == 0)							\
+      (result) = 1;							\
+    else								\
+      {									\
+	/* Calculate the total number of significant bits of X.  */	\
+	count_leading_zeros (__cnt, (ptr)[(size)-1]);			\
+	__totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
+									\
+	if (POW2_P (base))						\
+	  {								\
+	    __lb_base = mp_bases[base].big_base;			\
+	    (result) = (__totbits + __lb_base - 1) / __lb_base;		\
+	  }								\
+	else								\
+	  {								\
+	    DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base);	\
+	  }								\
+      }									\
+  } while (0)
+
+#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)			\
+  do {										\
+    int          __cnt;								\
+    mp_bitcnt_t  __totbits;							\
+    ASSERT ((size) > 0);							\
+    ASSERT ((ptr)[(size)-1] != 0);						\
+    count_leading_zeros (__cnt, (ptr)[(size)-1]);				\
+    __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);	\
+    (result) = (__totbits + (base2exp)-1) / (base2exp);				\
+  } while (0)
+
+
+/* bit count to limb count, rounding up */
+#define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
+
+/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
+   mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
+   in both cases (LIMBS_PER_ULONG is also defined here.) */
+#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
+
+#define LIMBS_PER_ULONG 1
+#define MPN_SET_UI(zp, zn, u)						\
+  (zp)[0] = (u);							\
+  (zn) = ((zp)[0] != 0);
+#define MPZ_FAKE_UI(z, zp, u)						\
+  (zp)[0] = (u);							\
+  PTR (z) = (zp);							\
+  SIZ (z) = ((zp)[0] != 0);						\
+  ASSERT_CODE (ALLOC (z) = 1);
+
+#else /* need two limbs per ulong */
+
+#define LIMBS_PER_ULONG 2
+#define MPN_SET_UI(zp, zn, u)						\
+  (zp)[0] = (u) & GMP_NUMB_MASK;					\
+  (zp)[1] = (u) >> GMP_NUMB_BITS;					\
+  (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
+#define MPZ_FAKE_UI(z, zp, u)						\
+  (zp)[0] = (u) & GMP_NUMB_MASK;					\
+  (zp)[1] = (u) >> GMP_NUMB_BITS;					\
+  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);			\
+  PTR (z) = (zp);							\
+  ASSERT_CODE (ALLOC (z) = 2);
+
+#endif
+
+
+#if HAVE_HOST_CPU_FAMILY_x86
+#define TARGET_REGISTER_STARVED 1
+#else
+#define TARGET_REGISTER_STARVED 0
+#endif
+
+
+/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
+   or 0 there into a limb 0xFF..FF or 0 respectively.
+
+   On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
+   but C99 doesn't guarantee signed right shifts are arithmetic, so we have
+   a little compile-time test and a fallback to a "? :" form.  The latter is
+   necessary for instance on Cray vector systems.
+
+   Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
+   to an arithmetic right shift anyway, but it's good to get the desired
+   shift on past versions too (in particular since an important use of
+   LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
+
+#define LIMB_HIGHBIT_TO_MASK(n)						\
+  (((mp_limb_signed_t) -1 >> 1) < 0					\
+   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)			\
+   : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
+
+
+/* Use a library function for invert_limb, if available. */
+#define  mpn_invert_limb __MPN(invert_limb)
+__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
+#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
+#define invert_limb(invxl,xl)						\
+  do {									\
+    (invxl) = mpn_invert_limb (xl);					\
+  } while (0)
+#endif
+
+#ifndef invert_limb
+#define invert_limb(invxl,xl)						\
+  do {									\
+    mp_limb_t _dummy;							\
+    ASSERT ((xl) != 0);							\
+    udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl);		\
+  } while (0)
+#endif
+
+#define invert_pi1(dinv, d1, d0)					\
+  do {									\
+    mp_limb_t _v, _p, _t1, _t0, _mask;					\
+    invert_limb (_v, d1);						\
+    _p = (d1) * _v;							\
+    _p += (d0);								\
+    if (_p < (d0))							\
+      {									\
+	_v--;								\
+	_mask = -(mp_limb_t) (_p >= (d1));				\
+	_p -= (d1);							\
+	_v += _mask;							\
+	_p -= _mask & (d1);						\
+      }									\
+    umul_ppmm (_t1, _t0, d0, _v);					\
+    _p += _t1;								\
+    if (_p < _t1)							\
+      {									\
+	_v--;								\
+	if (UNLIKELY (_p >= (d1)))					\
+	  {								\
+	    if (_p > (d1) || _t0 >= (d0))				\
+	      _v--;							\
+	  }								\
+      }									\
+    (dinv).inv32 = _v;							\
+  } while (0)
+
+
+/* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
+   We write things strangely below, to help gcc.  A more straightforward
+   version:
+	_r = (nl) - _qh * (d);
+	_t = _r + (d);
+	if (_r >= _ql)
+	  {
+	    _qh--;
+	    _r = _t;
+	  }
+   For one operation shorter critical path, one may want to use this form:
+	_p = _qh * (d)
+	_s = (nl) + (d);
+	_r = (nl) - _p;
+	_t = _s - _p;
+	if (_r >= _ql)
+	  {
+	    _qh--;
+	    _r = _t;
+	  }
+*/
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di)				\
+  do {									\
+    mp_limb_t _qh, _ql, _r, _mask;					\
+    umul_ppmm (_qh, _ql, (nh), (di));					\
+    if (__builtin_constant_p (nl) && (nl) == 0)				\
+      {									\
+	_qh += (nh) + 1;						\
+	_r = - _qh * (d);						\
+	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+      }									\
+    else								\
+      {									\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  {								\
+	    _r -= (d);							\
+	    _qh++;							\
+	  }								\
+      }									\
+    (r) = _r;								\
+    (q) = _qh;								\
+  } while (0)
+
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+   udiv_qrnnd_preinv, works also for the case NH == D, where the
+   quotient doesn't quite fit in a single limb. */
+#define udiv_rnnd_preinv(r, nh, nl, d, di)				\
+  do {									\
+    mp_limb_t _qh, _ql, _r, _mask;					\
+    umul_ppmm (_qh, _ql, (nh), (di));					\
+    if (__builtin_constant_p (nl) && (nl) == 0)				\
+      {									\
+	_r = ~(_qh + (nh)) * (d);					\
+	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
+	_r += _mask & (d);						\
+      }									\
+    else								\
+      {									\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  _r -= (d);							\
+      }									\
+    (r) = _r;								\
+  } while (0)
+
+/* Compute quotient the quotient and remainder for n / d. Requires d
+   >= B^2 / 2 and n < d B. di is the inverse
+
+     floor ((B^3 - 1) / (d0 + d1 B)) - B.
+
+   NOTE: Output variables are updated multiple times. Only some inputs
+   and outputs may overlap.
+*/
+#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
+  do {									\
+    mp_limb_t _q0, _t1, _t0, _mask;					\
+    umul_ppmm ((q), _q0, (n2), (dinv));					\
+    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
+									\
+    /* Compute the two most significant limbs of n - q'd */		\
+    (r1) = (n1) - (d1) * (q);						\
+    sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));			\
+    umul_ppmm (_t1, _t0, (d0), (q));					\
+    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
+    (q)++;								\
+									\
+    /* Conditionally adjust q and the remainders */			\
+    _mask = - (mp_limb_t) ((r1) >= _q0);				\
+    (q) += _mask;							\
+    add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));	\
+    if (UNLIKELY ((r1) >= (d1)))					\
+      {									\
+	if ((r1) > (d1) || (r0) >= (d0))				\
+	  {								\
+	    (q)++;							\
+	    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));		\
+	  }								\
+      }									\
+  } while (0)
+
+#ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
+#define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
+#endif
+
+
+/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
+   plain mpn_divrem_1.  The default is yes, since the few CISC chips where
+   preinv is not good have defines saying so.  */
+#ifndef USE_PREINV_DIVREM_1
+#define USE_PREINV_DIVREM_1   1
+#endif
+
+#if USE_PREINV_DIVREM_1
+#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+  mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
+#else
+#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+  mpn_divrem_1 (qp, xsize, ap, size, d)
+#endif
+
+#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#endif
+
+/* This selection may seem backwards.  The reason mpn_mod_1 typically takes
+   over for larger sizes is that it uses the mod_1_1 function.  */
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)		\
+  (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)		\
+   ? mpn_preinv_mod_1 (src, size, divisor, inverse)			\
+   : mpn_mod_1 (src, size, divisor))
+
+
+#ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
+#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+#endif
+
+
+/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
+   plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
+   mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
+   modexact are faster at all sizes, so the defaults are 0.  Those CPUs
+   where this is not right have a tuned threshold.  */
+#ifndef DIVEXACT_1_THRESHOLD
+#define DIVEXACT_1_THRESHOLD  0
+#endif
+#ifndef BMOD_1_TO_MOD_1_THRESHOLD
+#define BMOD_1_TO_MOD_1_THRESHOLD  10
+#endif
+
+#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d)				\
+  do {									\
+    if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD))			\
+      ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d));	\
+    else								\
+      {									\
+	ASSERT (mpn_mod_1 (up, n, d) == 0);				\
+	mpn_divexact_1 (rp, up, n, d);					\
+      }									\
+  } while (0)
+
+#ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
+#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+#define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+#else
+#define mpn_modexact_1_odd(src,size,divisor) \
+  mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
+#endif
+
+#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)			\
+  (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)			\
+   ? mpn_modexact_1_odd (src, size, divisor)				\
+   : mpn_mod_1 (src, size, divisor))
+
+/* binvert_limb() sets inv to the multiplicative inverse of n modulo
+   2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
+   n must be odd (otherwise such an inverse doesn't exist).
+
+   This is not to be confused with invert_limb(), which is completely
+   different.
+
+   The table lookup gives an inverse with the low 8 bits valid, and each
+   multiply step doubles the number of bits.  See Jebelean "An algorithm for
+   exact division" end of section 4 (reference in gmp.texi).
+
+   Possible enhancement: Could use UHWtype until the last step, if half-size
+   multiplies are faster (might help under _LONG_LONG_LIMB).
+
+   Alternative: As noted in Granlund and Montgomery "Division by Invariant
+   Integers using Multiplication" (reference in gmp.texi), n itself gives a
+   3-bit inverse immediately, and could be used instead of a table lookup.
+   A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
+   bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
+
+#define binvert_limb_table  __gmp_binvert_limb_table
+__GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
+
+#define binvert_limb(inv,n)						\
+  do {									\
+    mp_limb_t  __n = (n);						\
+    mp_limb_t  __inv;							\
+    ASSERT ((__n & 1) == 1);						\
+									\
+    __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */		\
+    if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;	\
+    if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;	\
+    if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;	\
+									\
+    if (GMP_NUMB_BITS > 64)						\
+      {									\
+	int  __invbits = 64;						\
+	do {								\
+	  __inv = 2 * __inv - __inv * __inv * __n;			\
+	  __invbits *= 2;						\
+	} while (__invbits < GMP_NUMB_BITS);				\
+      }									\
+									\
+    ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);			\
+    (inv) = __inv & GMP_NUMB_MASK;					\
+  } while (0)
+#define modlimb_invert binvert_limb  /* backward compatibility */
+
+/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
+   Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
+   GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
+   we need to start from GMP_NUMB_MAX>>1. */
+#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
+
+/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
+   These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
+#define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
+#define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
+
+
+/* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
+
+   It's not clear whether this is the best way to do this calculation.
+   Anything congruent to -a would be fine for the one limb congruence
+   tests.  */
+
+#define NEG_MOD(r, a, d)						\
+  do {									\
+    ASSERT ((d) != 0);							\
+    ASSERT_LIMB (a);							\
+    ASSERT_LIMB (d);							\
+									\
+    if ((a) <= (d))							\
+      {									\
+	/* small a is reasonably likely */				\
+	(r) = (d) - (a);						\
+      }									\
+    else								\
+      {									\
+	unsigned   __twos;						\
+	mp_limb_t  __dnorm;						\
+	count_leading_zeros (__twos, d);				\
+	__twos -= GMP_NAIL_BITS;					\
+	__dnorm = (d) << __twos;					\
+	(r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);		\
+      }									\
+									\
+    ASSERT_LIMB (r);							\
+  } while (0)
+
+/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
+#define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
+
+
+/* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
+   to 0 if there's an even number.  "n" should be an unsigned long and "p"
+   an int.  */
+
+#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
+#define ULONG_PARITY(p, n)						\
+  do {									\
+    int __p;								\
+    __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));			\
+    (p) = __p & 1;							\
+  } while (0)
+#endif
+
+/* Cray intrinsic _popcnt. */
+#ifdef _CRAY
+#define ULONG_PARITY(p, n)      \
+  do {                          \
+    (p) = _popcnt (n) & 1;      \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
+    && ! defined (NO_ASM) && defined (__ia64)
+/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
+   to a 64 bit unsigned long long for popcnt */
+#define ULONG_PARITY(p, n)						\
+  do {									\
+    unsigned long long  __n = (unsigned long) (n);			\
+    int  __p;								\
+    __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));		\
+    (p) = __p & 1;							\
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
+    && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
+#if __GMP_GNUC_PREREQ (3,1)
+#define __GMP_qm "=Qm"
+#define __GMP_q "=Q"
+#else
+#define __GMP_qm "=qm"
+#define __GMP_q "=q"
+#endif
+#define ULONG_PARITY(p, n)						\
+  do {									\
+    char	   __p;							\
+    unsigned long  __n = (n);						\
+    __n ^= (__n >> 16);							\
+    __asm__ ("xorb %h1, %b1\n\t"					\
+	     "setpo %0"							\
+	 : __GMP_qm (__p), __GMP_q (__n)				\
+	 : "1" (__n));							\
+    (p) = __p;								\
+  } while (0)
+#endif
+
+#if ! defined (ULONG_PARITY)
+#define ULONG_PARITY(p, n)						\
+  do {									\
+    unsigned long  __n = (n);						\
+    int  __p = 0;							\
+    do									\
+      {									\
+	__p ^= 0x96696996L >> (__n & 0x1F);				\
+	__n >>= 5;							\
+      }									\
+    while (__n != 0);							\
+									\
+    (p) = __p & 1;							\
+  } while (0)
+#endif
+
+
+/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
+   version 3.1 at least) doesn't seem to know how to generate rlwimi for
+   anything other than bit-fields, so use "asm".  */
+#if defined (__GNUC__) && ! defined (NO_ASM)                    \
+  && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    mp_limb_t  __bswapl_src = (src);					\
+    mp_limb_t  __tmp1 = __bswapl_src >> 24;		/* low byte */	\
+    mp_limb_t  __tmp2 = __bswapl_src << 24;		/* high byte */	\
+    __asm__ ("rlwimi %0, %2, 24, 16, 23"		/* 2nd low */	\
+	 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));		\
+    __asm__ ("rlwimi %0, %2,  8,  8, 15"		/* 3nd high */	\
+	 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));		\
+    (dst) = __tmp1 | __tmp2;				/* whole */	\
+  } while (0)
+#endif
+
+/* bswap is available on i486 and up and is fast.  A combination rorw $8 /
+   roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
+   kernel with xchgb instead of rorw), but this is not done here, because
+   i386 means generic x86 and mixing word and dword operations will cause
+   partial register stalls on P6 chips.  */
+#if defined (__GNUC__) && ! defined (NO_ASM)            \
+  && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
+  && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    __asm__ ("bswap %0" : "=r" (dst) : "0" (src));			\
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM)            \
+  && defined (__amd64__) && GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));			\
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
+    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));		\
+  } while (0)
+#endif
+
+/* As per glibc. */
+#if defined (__GNUC__) && ! defined (NO_ASM)                    \
+  && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    mp_limb_t  __bswapl_src = (src);					\
+    __asm__ ("ror%.w %#8, %0\n\t"					\
+	     "swap   %0\n\t"						\
+	     "ror%.w %#8, %0"						\
+	     : "=d" (dst)						\
+	     : "0" (__bswapl_src));					\
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB)
+#if GMP_LIMB_BITS == 8
+#define BSWAP_LIMB(dst, src)				\
+  do { (dst) = (src); } while (0)
+#endif
+#if GMP_LIMB_BITS == 16
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    (dst) = ((src) << 8) + ((src) >> 8);				\
+  } while (0)
+#endif
+#if GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    (dst) =								\
+      ((src) << 24)							\
+      + (((src) & 0xFF00) << 8)						\
+      + (((src) >> 8) & 0xFF00)						\
+      + ((src) >> 24);							\
+  } while (0)
+#endif
+#if GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    (dst) =								\
+      ((src) << 56)							\
+      + (((src) & 0xFF00) << 40)					\
+      + (((src) & 0xFF0000) << 24)					\
+      + (((src) & 0xFF000000) << 8)					\
+      + (((src) >> 8) & 0xFF000000)					\
+      + (((src) >> 24) & 0xFF0000)					\
+      + (((src) >> 40) & 0xFF00)					\
+      + ((src) >> 56);							\
+  } while (0)
+#endif
+#endif
+
+#if ! defined (BSWAP_LIMB)
+#define BSWAP_LIMB(dst, src)						\
+  do {									\
+    mp_limb_t  __bswapl_src = (src);					\
+    mp_limb_t  __dstl = 0;						\
+    int	       __i;							\
+    for (__i = 0; __i < GMP_LIMB_BYTES; __i++)			\
+      {									\
+	__dstl = (__dstl << 8) | (__bswapl_src & 0xFF);			\
+	__bswapl_src >>= 8;						\
+      }									\
+    (dst) = __dstl;							\
+  } while (0)
+#endif
+
+
+/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
+   those we know are fast.  */
+#if defined (__GNUC__) && ! defined (NO_ASM)				\
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
+  && (HAVE_HOST_CPU_powerpc604						\
+      || HAVE_HOST_CPU_powerpc604e					\
+      || HAVE_HOST_CPU_powerpc750					\
+      || HAVE_HOST_CPU_powerpc7400)
+#define BSWAP_LIMB_FETCH(limb, src)					\
+  do {									\
+    mp_srcptr  __blf_src = (src);					\
+    mp_limb_t  __limb;							\
+    __asm__ ("lwbrx %0, 0, %1"						\
+	     : "=r" (__limb)						\
+	     : "r" (__blf_src),						\
+	       "m" (*__blf_src));					\
+    (limb) = __limb;							\
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB_FETCH)
+#define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
+#endif
+
+
+/* On the same basis that lwbrx might be slow, restrict stwbrx to those we
+   know are fast.  FIXME: Is this necessary?  */
+#if defined (__GNUC__) && ! defined (NO_ASM)				\
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
+  && (HAVE_HOST_CPU_powerpc604						\
+      || HAVE_HOST_CPU_powerpc604e					\
+      || HAVE_HOST_CPU_powerpc750					\
+      || HAVE_HOST_CPU_powerpc7400)
+#define BSWAP_LIMB_STORE(dst, limb)					\
+  do {									\
+    mp_ptr     __dst = (dst);						\
+    mp_limb_t  __limb = (limb);						\
+    __asm__ ("stwbrx %1, 0, %2"						\
+	     : "=m" (*__dst)						\
+	     : "r" (__limb),						\
+	       "r" (__dst));						\
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB_STORE)
+#define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
+#endif
+
+
+/* Byte swap limbs from {src,size} and store at {dst,size}. */
+#define MPN_BSWAP(dst, src, size)					\
+  do {									\
+    mp_ptr     __dst = (dst);						\
+    mp_srcptr  __src = (src);						\
+    mp_size_t  __size = (size);						\
+    mp_size_t  __i;							\
+    ASSERT ((size) >= 0);						\
+    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));			\
+    CRAY_Pragma ("_CRI ivdep");						\
+    for (__i = 0; __i < __size; __i++)					\
+      {									\
+	BSWAP_LIMB_FETCH (*__dst, __src);				\
+	__dst++;							\
+	__src++;							\
+      }									\
+  } while (0)
+
+/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
+#define MPN_BSWAP_REVERSE(dst, src, size)				\
+  do {									\
+    mp_ptr     __dst = (dst);						\
+    mp_size_t  __size = (size);						\
+    mp_srcptr  __src = (src) + __size - 1;				\
+    mp_size_t  __i;							\
+    ASSERT ((size) >= 0);						\
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
+    CRAY_Pragma ("_CRI ivdep");						\
+    for (__i = 0; __i < __size; __i++)					\
+      {									\
+	BSWAP_LIMB_FETCH (*__dst, __src);				\
+	__dst++;							\
+	__src--;							\
+      }									\
+  } while (0)
+
+
+/* No processor claiming to be SPARC v9 compliant seems to
+   implement the POPC instruction.  Disable pattern for now.  */
+#if 0
+#if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)					\
+  do {									\
+    DItype __res;							\
+    __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));		\
+  } while (0)
+#endif
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
+#define popc_limb(result, input)					\
+  do {									\
+    __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));		\
+  } while (0)
+#endif
+
+/* Cray intrinsic. */
+#ifdef _CRAY
+#define popc_limb(result, input)					\
+  do {									\
+    (result) = _popcnt (input);						\
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
+    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)					\
+  do {									\
+    __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));		\
+  } while (0)
+#endif
+
+/* Cool population count of an mp_limb_t.
+   You have to figure out how this works, We won't tell you!
+
+   The constants could also be expressed as:
+     0x55... = [2^N / 3]     = [(2^N-1)/3]
+     0x33... = [2^N / 5]     = [(2^N-1)/5]
+     0x0f... = [2^N / 17]    = [(2^N-1)/17]
+     (N is GMP_LIMB_BITS, [] denotes truncation.) */
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 8
+#define popc_limb(result, input)					\
+  do {									\
+    mp_limb_t  __x = (input);						\
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
+    __x = ((__x >> 4) + __x);						\
+    (result) = __x & 0x0f;						\
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 16
+#define popc_limb(result, input)					\
+  do {									\
+    mp_limb_t  __x = (input);						\
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
+    __x += (__x >> 4);							\
+    __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369);	\
+    (result) = __x;							\
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 32
+#define popc_limb(result, input)					\
+  do {									\
+    mp_limb_t  __x = (input);						\
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
+    __x = ((__x >> 8) + __x);						\
+    __x = ((__x >> 16) + __x);						\
+    (result) = __x & 0xff;						\
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)					\
+  do {									\
+    mp_limb_t  __x = (input);						\
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
+    __x = ((__x >> 8) + __x);						\
+    __x = ((__x >> 16) + __x);						\
+    __x = ((__x >> 32) + __x);						\
+    (result) = __x & 0xff;						\
+  } while (0)
+#endif
+
+
+/* Define stuff for longlong.h.  */
+#if HAVE_ATTRIBUTE_MODE
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+#else
+typedef unsigned char UQItype;
+typedef		 long SItype;
+typedef unsigned long USItype;
+#if HAVE_LONG_LONG
+typedef	long long int DItype;
+typedef unsigned long long int UDItype;
+#else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
+typedef long int DItype;
+typedef unsigned long int UDItype;
+#endif
+#endif
+
+typedef mp_limb_t UWtype;
+typedef unsigned int UHWtype;
+#define W_TYPE_SIZE GMP_LIMB_BITS
+
+/* Define ieee_double_extract and _GMP_IEEE_FLOATS.
+
+   Bit field packing is "implementation defined" according to C99, which
+   leaves us at the compiler's mercy here.  For some systems packing is
+   defined in the ABI (eg. x86).  In any case so far it seems universal that
+   little endian systems pack from low to high, and big endian from high to
+   low within the given type.
+
+   Within the fields we rely on the integer endianness being the same as the
+   float endianness, this is true everywhere we know of and it'd be a fairly
+   strange system that did anything else.  */
+
+#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t manl:32;
+    } s;
+  double d;
+};
+#endif
+
+#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t manl:32;
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t sig:1;
+    } s;
+  double d;
+};
+#endif
+
+#if HAVE_DOUBLE_IEEE_BIG_ENDIAN
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t manl:32;
+    } s;
+  double d;
+};
+#endif
+
+#if HAVE_DOUBLE_VAX_D
+union double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t man3:7;	/* highest 7 bits */
+      gmp_uint_least32_t exp:8;		/* excess-128 exponent */
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t man2:16;
+      gmp_uint_least32_t man1:16;
+      gmp_uint_least32_t man0:16;	/* lowest 16 bits */
+    } s;
+  double d;
+};
+#endif
+
+/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
+   that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
+#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
+/* Maximum number of limbs it will take to store any `double'.
+   We assume doubles have 53 mantissa bits.  */
+#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
+
+__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
+
+#define mpn_get_d __gmpn_get_d
+__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
+
+
+/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
+   a_inf if x is an infinity.  Both are considered unlikely values, for
+   branch prediction.  */
+
+#if _GMP_IEEE_FLOATS
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
+  do {									\
+    union ieee_double_extract  u;					\
+    u.d = (x);								\
+    if (UNLIKELY (u.s.exp == 0x7FF))					\
+      {									\
+	if (u.s.manl == 0 && u.s.manh == 0)				\
+	  { a_inf; }							\
+	else								\
+	  { a_nan; }							\
+      }									\
+  } while (0)
+#endif
+
+#if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
+/* no nans or infs in these formats */
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
+  do { } while (0)
+#endif
+
+#ifndef DOUBLE_NAN_INF_ACTION
+/* Unknown format, try something generic.
+   NaN should be "unordered", so x!=x.
+   Inf should be bigger than DBL_MAX.  */
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
+  do {									\
+    {									\
+      if (UNLIKELY ((x) != (x)))					\
+	{ a_nan; }							\
+      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))		\
+	{ a_inf; }							\
+    }									\
+  } while (0)
+#endif
+
+/* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
+   in the coprocessor, which means a bigger exponent range than normal, and
+   depending on the rounding mode, a bigger mantissa than normal.  (See
+   "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
+   "d" through memory to force any rounding and overflows to occur.
+
+   On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
+   registers, where there's no such extra precision and no need for the
+   FORCE_DOUBLE.  We don't bother to detect this since the present uses for
+   FORCE_DOUBLE are only in test programs and default generic C code.
+
+   Not quite sure that an "automatic volatile" will use memory, but it does
+   in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
+   apparently matching operands like "0" are only allowed on a register
+   output.  gcc 3.4 warns about this, though in fact it and past versions
+   seem to put the operand through memory as hoped.  */
+
+#if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
+     || defined (__amd64__))
+#define FORCE_DOUBLE(d) \
+  do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
+#else
+#define FORCE_DOUBLE(d)  do { } while (0)
+#endif
+
+
+__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
+
+__GMP_DECLSPEC extern int __gmp_junk;
+__GMP_DECLSPEC extern const int __gmp_0;
+__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_overflow_in_mpz (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
+#define GMP_ERROR(code)   __gmp_exception (code)
+#define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
+#define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
+#define MPZ_OVERFLOW      __gmp_overflow_in_mpz ()
+
+#if defined _LONG_LONG_LIMB
+#define CNST_LIMB(C) ((mp_limb_t) C##LL)
+#else /* not _LONG_LONG_LIMB */
+#define CNST_LIMB(C) ((mp_limb_t) C##L)
+#endif /* _LONG_LONG_LIMB */
+
+/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
+#if GMP_NUMB_BITS == 2
+#define PP 0x3					/* 3 */
+#define PP_FIRST_OMITTED 5
+#endif
+#if GMP_NUMB_BITS == 4
+#define PP 0xF					/* 3 x 5 */
+#define PP_FIRST_OMITTED 7
+#endif
+#if GMP_NUMB_BITS == 8
+#define PP 0x69					/* 3 x 5 x 7 */
+#define PP_FIRST_OMITTED 11
+#endif
+#if GMP_NUMB_BITS == 16
+#define PP 0x3AA7				/* 3 x 5 x 7 x 11 x 13 */
+#define PP_FIRST_OMITTED 17
+#endif
+#if GMP_NUMB_BITS == 32
+#define PP 0xC0CFD797L				/* 3 x 5 x 7 x 11 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#define PP_FIRST_OMITTED 31
+#endif
+#if GMP_NUMB_BITS == 64
+#define PP CNST_LIMB(0xE221F97C30E94E1D)	/* 3 x 5 x 7 x 11 x ... x 53 */
+#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
+#define PP_FIRST_OMITTED 59
+#endif
+#ifndef PP_FIRST_OMITTED
+#define PP_FIRST_OMITTED 3
+#endif
+
+typedef struct
+{
+  mp_limb_t d0, d1;
+} mp_double_limb_t;
+
+#define mpn_gcd_22 __MPN (gcd_22)
+__GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+/* BIT1 means a result value in bit 1 (second least significant bit), with a
+   zero bit representing +1 and a one bit representing -1.  Bits other than
+   bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
+   used to ensure the expressions are "int"s even if a and/or b might be
+   other types.
+
+   JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
+   and their speed is important.  Expressions are used rather than
+   conditionals to accumulate sign changes, which effectively means XORs
+   instead of conditional JUMPs. */
+
+/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
+
+/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_U0(a)   ((a) == 1)
+
+/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
+   come up with a better name. */
+
+/* (a/0), with a given by low and size;
+   is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_LS0(alow,asize) \
+  (((asize) == 1 || (asize) == -1) && (alow) == 1)
+
+/* (a/0), with a an mpz_t;
+   fetch of low limb always valid, even if size is zero */
+#define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
+
+/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
+#define JACOBI_0U(b)   ((b) == 1)
+
+/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
+
+/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0LS(blow,bsize) \
+  (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
+
+/* Convert a bit1 to +1 or -1. */
+#define JACOBI_BIT1_TO_PN(result_bit1) \
+  (1 - ((int) (result_bit1) & 2))
+
+/* (2/b), with b unsigned and odd;
+   is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
+   hence obtained from (b>>1)^b */
+#define JACOBI_TWO_U_BIT1(b) \
+  ((int) (((b) >> 1) ^ (b)))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U_BIT1(twos, b) \
+  ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U(twos, b) \
+  (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
+
+/* (-1/b), with b odd (signed or unsigned);
+   is (-1)^((b-1)/2) */
+#define JACOBI_N1B_BIT1(b) \
+  ((int) (b))
+
+/* (a/b) effect due to sign of a: signed/unsigned, b odd;
+   is (-1/b) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_SU_BIT1(a, b) \
+  ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
+
+/* (a/b) effect due to sign of b: signed/signed;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SS_BIT1(a, b) \
+  ((((a)<0) & ((b)<0)) << 1)
+
+/* (a/b) effect due to sign of b: signed/mpz;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SZ_BIT1(a, b) \
+  JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
+
+/* (a/b) effect due to sign of b: mpz/signed;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_ZS_BIT1(a, b) \
+  JACOBI_BSGN_SZ_BIT1 (b, a)
+
+/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
+   is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
+   both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
+   because this is used in a couple of places with only bit 1 of a or b
+   valid. */
+#define JACOBI_RECIP_UU_BIT1(a, b) \
+  ((int) ((a) & (b)))
+
+/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
+   decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
+   updated for the new b_ptr.  result_bit1 is updated according to the
+   factors of 2 stripped, as per (a/2).  */
+#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)	\
+  do {									\
+    ASSERT ((b_size) >= 1);						\
+    ASSERT ((b_low) == (b_ptr)[0]);					\
+									\
+    while (UNLIKELY ((b_low) == 0))					\
+      {									\
+	(b_size)--;							\
+	ASSERT ((b_size) >= 1);						\
+	(b_ptr)++;							\
+	(b_low) = *(b_ptr);						\
+									\
+	ASSERT (((a) & 1) != 0);					\
+	if ((GMP_NUMB_BITS % 2) == 1)					\
+	  (result_bit1) ^= JACOBI_TWO_U_BIT1(a);			\
+      }									\
+  } while (0)
+
+/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
+   modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
+   unsigned.  modexact_1_odd effectively calculates -a mod b, and
+   result_bit1 is adjusted for the factor of -1.
+
+   The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
+   sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
+   factor to introduce into result_bit1, so for that case use mpn_mod_1
+   unconditionally.
+
+   FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
+   for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
+   or not skip a divide step, or something. */
+
+#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
+  do {									   \
+    mp_srcptr  __a_ptr	= (a_ptr);					   \
+    mp_size_t  __a_size = (a_size);					   \
+    mp_limb_t  __b	= (b);						   \
+									   \
+    ASSERT (__a_size >= 1);						   \
+    ASSERT (__b & 1);							   \
+									   \
+    if ((GMP_NUMB_BITS % 2) != 0					   \
+	|| ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))	   \
+      {									   \
+	(a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);			   \
+      }									   \
+    else								   \
+      {									   \
+	(result_bit1) ^= JACOBI_N1B_BIT1 (__b);				   \
+	(a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);		   \
+      }									   \
+  } while (0)
+
+/* State for the Jacobi computation using Lehmer. */
+#define jacobi_table __gmp_jacobi_table
+__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
+
+/* Bit layout for the initial state. b must be odd.
+
+      3  2  1 0
+   +--+--+--+--+
+   |a1|a0|b1| s|
+   +--+--+--+--+
+
+ */
+static inline unsigned
+mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
+{
+  ASSERT (b & 1);
+  ASSERT (s <= 1);
+  return ((a & 3) << 2) + (b & 2) + s;
+}
+
+static inline int
+mpn_jacobi_finish (unsigned bits)
+{
+  /* (a, b) = (1,0) or (0,1) */
+  ASSERT ( (bits & 14) == 0);
+
+  return 1-2*(bits & 1);
+}
+
+static inline unsigned
+mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
+{
+  /* FIXME: Could halve table size by not including the e bit in the
+   * index, and instead xor when updating. Then the lookup would be
+   * like
+   *
+   *   bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
+   */
+
+  ASSERT (bits < 26);
+  ASSERT (denominator < 2);
+  ASSERT (q < 4);
+
+  /* For almost all calls, denominator is constant and quite often q
+     is constant too. So use addition rather than or, so the compiler
+     can put the constant part can into the offset of an indexed
+     addressing instruction.
+
+     With constant denominator, the below table lookup is compiled to
+
+       C Constant q = 1, constant denominator = 1
+       movzbl table+5(%eax,8), %eax
+
+     or
+
+       C q in %edx, constant denominator = 1
+       movzbl table+4(%edx,%eax,8), %eax
+
+     One could maintain the state preshifted 3 bits, to save a shift
+     here, but at least on x86, that's no real saving.
+  */
+  return jacobi_table[(bits << 3) + (denominator << 2) + q];
+}
+
+/* Matrix multiplication */
+#define   mpn_matrix22_mul __MPN(matrix22_mul)
+__GMP_DECLSPEC void      mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+#define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#ifndef MATRIX22_STRASSEN_THRESHOLD
+#define MATRIX22_STRASSEN_THRESHOLD 30
+#endif
+
+/* HGCD definitions */
+
+/* Extract one numb, shifting count bits left
+    ________  ________
+   |___xh___||___xl___|
+	  |____r____|
+   >count <
+
+   The count includes any nail bits, so it should work fine if count
+   is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
+   xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
+
+   FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
+   those calls where the count high bits of xh may be non-zero.
+*/
+
+#define MPN_EXTRACT_NUMB(count, xh, xl)				\
+  ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |	\
+   ((xl) >> (GMP_LIMB_BITS - (count))))
+
+
+/* The matrix non-negative M = (u, u'; v,v') keeps track of the
+   reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
+   than a, b. The determinant must always be one, so that M has an
+   inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
+   bits. */
+struct hgcd_matrix1
+{
+  mp_limb_t u[2][2];
+};
+
+#define mpn_hgcd2 __MPN (hgcd2)
+__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,	struct hgcd_matrix1 *);
+
+#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
+
+#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
+
+#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
+__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
+
+struct hgcd_matrix
+{
+  mp_size_t alloc;		/* for sanity checking only */
+  mp_size_t n;
+  mp_ptr p[2][2];
+};
+
+#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
+
+#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
+__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
+__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
+
+#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
+
+#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_step __MPN(hgcd_step)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_reduce __MPN(hgcd_reduce)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
+
+#define mpn_hgcd_itch __MPN (hgcd_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST;
+
+#define mpn_hgcd __MPN (hgcd)
+__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST;
+
+#define mpn_hgcd_appr __MPN (hgcd_appr)
+__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
+
+typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
+
+/* Needs storage for the quotient */
+#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
+
+#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
+__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
+
+struct gcdext_ctx
+{
+  /* Result parameters. */
+  mp_ptr gp;
+  mp_size_t gn;
+  mp_ptr up;
+  mp_size_t *usize;
+
+  /* Cofactors updated in each step. */
+  mp_size_t un;
+  mp_ptr u0, u1, tp;
+};
+
+#define mpn_gcdext_hook __MPN (gcdext_hook)
+gcd_subdiv_step_hook mpn_gcdext_hook;
+
+#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
+
+#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
+__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+/* 4*(an + 1) + 4*(bn + 1) + an */
+#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
+
+#ifndef HGCD_THRESHOLD
+#define HGCD_THRESHOLD 400
+#endif
+
+#ifndef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD 400
+#endif
+
+#ifndef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 1000
+#endif
+
+#ifndef GCD_DC_THRESHOLD
+#define GCD_DC_THRESHOLD 1000
+#endif
+
+#ifndef GCDEXT_DC_THRESHOLD
+#define GCDEXT_DC_THRESHOLD 600
+#endif
+
+/* Definitions for mpn_set_str and mpn_get_str */
+struct powers
+{
+  mp_ptr p;			/* actual power value */
+  mp_size_t n;			/* # of limbs at p */
+  mp_size_t shift;		/* weight of lowest limb, in limb base B */
+  size_t digits_in_base;	/* number of corresponding digits */
+  int base;
+};
+typedef struct powers powers_t;
+#define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */
+#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
+#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
+
+#define mpn_compute_powtab __MPN(compute_powtab)
+__GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
+#define   mpn_dc_set_str __MPN(dc_set_str)
+__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
+#define   mpn_bc_set_str __MPN(bc_set_str)
+__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+
+/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
+   limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
+   hence giving back the user's size in bits rounded up.  Notice that
+   converting prec->bits->prec gives an unchanged value.  */
+#define __GMPF_BITS_TO_PREC(n)						\
+  ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
+#define __GMPF_PREC_TO_BITS(n) \
+  ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
+
+__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
+
+/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
+   down.  */
+#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b)				\
+  do {									\
+    mp_limb_t _ph, _dummy;						\
+    umul_ppmm (_ph, _dummy,						\
+	       mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
+    res = _ph;								\
+  } while (0)
+
+/* Compute the number of limbs corresponding to ndigits base-b digits, rounding
+   up.  */
+#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b)			\
+  do {									\
+    mp_limb_t _ph, _dummy;						\
+    umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits));	\
+    res = 8 * _ph / GMP_NUMB_BITS + 2;					\
+  } while (0)
+
+
+/* Set n to the number of significant digits an mpf of the given _mp_prec
+   field, in the given base.  This is a rounded up value, designed to ensure
+   there's enough digits to reproduce all the guaranteed part of the value.
+
+   There are prec many limbs, but the high might be only "1" so forget it
+   and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
+   further +1 is because the limbs usually won't fall on digit boundaries.
+
+   FIXME: If base is a power of 2 and the bits per digit divides
+   GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
+   base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
+
+#define MPF_SIGNIFICANT_DIGITS(n, base, prec)				\
+  do {									\
+    size_t rawn;							\
+    ASSERT (base >= 2 && base < numberof (mp_bases));			\
+    DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base);			\
+    n = rawn + 2;							\
+  } while (0)
+
+
+/* Decimal point string, from the current C locale.  Needs <langinfo.h> for
+   nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
+   DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
+   their respective #if HAVE_FOO_H.
+
+   GLIBC recommends nl_langinfo because getting only one facet can be
+   faster, apparently. */
+
+/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
+#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
+#endif
+/* RADIXCHAR is deprecated, still in unix98 or some such. */
+#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
+#endif
+/* localeconv is slower since it returns all locale stuff */
+#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
+#endif
+#if ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (".")
+#endif
+
+
+#define DOPRNT_CONV_FIXED        1
+#define DOPRNT_CONV_SCIENTIFIC   2
+#define DOPRNT_CONV_GENERAL      3
+
+#define DOPRNT_JUSTIFY_NONE      0
+#define DOPRNT_JUSTIFY_LEFT      1
+#define DOPRNT_JUSTIFY_RIGHT     2
+#define DOPRNT_JUSTIFY_INTERNAL  3
+
+#define DOPRNT_SHOWBASE_YES      1
+#define DOPRNT_SHOWBASE_NO       2
+#define DOPRNT_SHOWBASE_NONZERO  3
+
+struct doprnt_params_t {
+  int         base;          /* negative for upper case */
+  int         conv;          /* choices above */
+  const char  *expfmt;       /* exponent format */
+  int         exptimes4;     /* exponent multiply by 4 */
+  char        fill;          /* character */
+  int         justify;       /* choices above */
+  int         prec;          /* prec field, or -1 for all digits */
+  int         showbase;      /* choices above */
+  int         showpoint;     /* if radix point always shown */
+  int         showtrailing;  /* if trailing zeros wanted */
+  char        sign;          /* '+', ' ', or '\0' */
+  int         width;         /* width field */
+};
+
+#if _GMP_H_HAVE_VA_LIST
+
+typedef int (*doprnt_format_t) (void *, const char *, va_list);
+typedef int (*doprnt_memory_t) (void *, const char *, size_t);
+typedef int (*doprnt_reps_t)   (void *, int, int);
+typedef int (*doprnt_final_t)  (void *);
+
+struct doprnt_funs_t {
+  doprnt_format_t  format;
+  doprnt_memory_t  memory;
+  doprnt_reps_t    reps;
+  doprnt_final_t   final;   /* NULL if not required */
+};
+
+extern const struct doprnt_funs_t  __gmp_fprintf_funs;
+extern const struct doprnt_funs_t  __gmp_sprintf_funs;
+extern const struct doprnt_funs_t  __gmp_snprintf_funs;
+extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
+extern const struct doprnt_funs_t  __gmp_ostream_funs;
+
+/* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
+   "size" of these have been written.  "alloc > size" is maintained, so
+   there's room to store a '\0' at the end.  "result" is where the
+   application wants the final block pointer.  */
+struct gmp_asprintf_t {
+  char    **result;
+  char    *buf;
+  size_t  size;
+  size_t  alloc;
+};
+
+#define GMP_ASPRINTF_T_INIT(d, output)					\
+  do {									\
+    (d).result = (output);						\
+    (d).alloc = 256;							\
+    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);		\
+    (d).size = 0;							\
+  } while (0)
+
+/* If a realloc is necessary, use twice the size actually required, so as to
+   avoid repeated small reallocs.  */
+#define GMP_ASPRINTF_T_NEED(d, n)					\
+  do {									\
+    size_t  alloc, newsize, newalloc;					\
+    ASSERT ((d)->alloc >= (d)->size + 1);				\
+									\
+    alloc = (d)->alloc;							\
+    newsize = (d)->size + (n);						\
+    if (alloc <= newsize)						\
+      {									\
+	newalloc = 2*newsize;						\
+	(d)->alloc = newalloc;						\
+	(d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,		\
+					       alloc, newalloc, char);	\
+      }									\
+  } while (0)
+
+__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
+__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
+__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
+
+/* buf is where to write the next output, and size is how much space is left
+   there.  If the application passed size==0 then that's what we'll have
+   here, and nothing at all should be written.  */
+struct gmp_snprintf_t {
+  char    *buf;
+  size_t  size;
+};
+
+/* Add the bytes printed by the call to the total retval, or bail out on an
+   error.  */
+#define DOPRNT_ACCUMULATE(call)						\
+  do {									\
+    int  __ret;								\
+    __ret = call;							\
+    if (__ret == -1)							\
+      goto error;							\
+    retval += __ret;							\
+  } while (0)
+#define DOPRNT_ACCUMULATE_FUN(fun, params)				\
+  do {									\
+    ASSERT ((fun) != NULL);						\
+    DOPRNT_ACCUMULATE ((*(fun)) params);				\
+  } while (0)
+
+#define DOPRNT_FORMAT(fmt, ap)						\
+  DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
+#define DOPRNT_MEMORY(ptr, len)						\
+  DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
+#define DOPRNT_REPS(c, n)						\
+  DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
+
+#define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
+
+#define DOPRNT_REPS_MAYBE(c, n)						\
+  do {									\
+    if ((n) != 0)							\
+      DOPRNT_REPS (c, n);						\
+  } while (0)
+#define DOPRNT_MEMORY_MAYBE(ptr, len)					\
+  do {									\
+    if ((len) != 0)							\
+      DOPRNT_MEMORY (ptr, len);						\
+  } while (0)
+
+__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
+__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
+
+#define __gmp_doprnt_mpf __gmp_doprnt_mpf2
+__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
+
+__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
+#endif /* _GMP_H_HAVE_VA_LIST */
+
+
+typedef int (*gmp_doscan_scan_t)  (void *, const char *, ...);
+typedef void *(*gmp_doscan_step_t) (void *, int);
+typedef int (*gmp_doscan_get_t)   (void *);
+typedef int (*gmp_doscan_unget_t) (int, void *);
+
+struct gmp_doscan_funs_t {
+  gmp_doscan_scan_t   scan;
+  gmp_doscan_step_t   step;
+  gmp_doscan_get_t    get;
+  gmp_doscan_unget_t  unget;
+};
+extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
+extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
+
+#if _GMP_H_HAVE_VA_LIST
+__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
+#endif
+
+
+/* For testing and debugging.  */
+#define MPZ_CHECK_FORMAT(z)						\
+  do {									\
+    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);		\
+    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));				\
+    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));				\
+  } while (0)
+
+#define MPQ_CHECK_FORMAT(q)						\
+  do {									\
+    MPZ_CHECK_FORMAT (mpq_numref (q));					\
+    MPZ_CHECK_FORMAT (mpq_denref (q));					\
+    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);				\
+									\
+    if (SIZ(mpq_numref(q)) == 0)					\
+      {									\
+	/* should have zero as 0/1 */					\
+	ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1				\
+		       && PTR(mpq_denref(q))[0] == 1);			\
+      }									\
+    else								\
+      {									\
+	/* should have no common factors */				\
+	mpz_t  g;							\
+	mpz_init (g);							\
+	mpz_gcd (g, mpq_numref(q), mpq_denref(q));			\
+	ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);				\
+	mpz_clear (g);							\
+      }									\
+  } while (0)
+
+#define MPF_CHECK_FORMAT(f)						\
+  do {									\
+    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53));			\
+    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);				\
+    if (SIZ(f) == 0)							\
+      ASSERT_ALWAYS (EXP(f) == 0);					\
+    if (SIZ(f) != 0)							\
+      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);			\
+  } while (0)
+
+
+/* Enhancement: The "mod" and "gcd_1" functions below could have
+   __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
+   function pointers, only actual functions.  It probably doesn't make much
+   difference to the gmp code, since hopefully we arrange calls so there's
+   no great need for the compiler to move things around.  */
+
+#if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
+/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
+   in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4.  Be sure to update
+   those when changing here.  */
+struct cpuvec_t {
+  DECL_add_n           ((*add_n));
+  DECL_addlsh1_n       ((*addlsh1_n));
+  DECL_addlsh2_n       ((*addlsh2_n));
+  DECL_addmul_1        ((*addmul_1));
+  DECL_addmul_2        ((*addmul_2));
+  DECL_bdiv_dbm1c      ((*bdiv_dbm1c));
+  DECL_cnd_add_n       ((*cnd_add_n));
+  DECL_cnd_sub_n       ((*cnd_sub_n));
+  DECL_com             ((*com));
+  DECL_copyd           ((*copyd));
+  DECL_copyi           ((*copyi));
+  DECL_divexact_1      ((*divexact_1));
+  DECL_divrem_1        ((*divrem_1));
+  DECL_gcd_11          ((*gcd_11));
+  DECL_lshift          ((*lshift));
+  DECL_lshiftc         ((*lshiftc));
+  DECL_mod_1           ((*mod_1));
+  DECL_mod_1_1p        ((*mod_1_1p));
+  DECL_mod_1_1p_cps    ((*mod_1_1p_cps));
+  DECL_mod_1s_2p       ((*mod_1s_2p));
+  DECL_mod_1s_2p_cps   ((*mod_1s_2p_cps));
+  DECL_mod_1s_4p       ((*mod_1s_4p));
+  DECL_mod_1s_4p_cps   ((*mod_1s_4p_cps));
+  DECL_mod_34lsub1     ((*mod_34lsub1));
+  DECL_modexact_1c_odd ((*modexact_1c_odd));
+  DECL_mul_1           ((*mul_1));
+  DECL_mul_basecase    ((*mul_basecase));
+  DECL_mullo_basecase  ((*mullo_basecase));
+  DECL_preinv_divrem_1 ((*preinv_divrem_1));
+  DECL_preinv_mod_1    ((*preinv_mod_1));
+  DECL_redc_1          ((*redc_1));
+  DECL_redc_2          ((*redc_2));
+  DECL_rshift          ((*rshift));
+  DECL_sqr_basecase    ((*sqr_basecase));
+  DECL_sub_n           ((*sub_n));
+  DECL_sublsh1_n       ((*sublsh1_n));
+  DECL_submul_1        ((*submul_1));
+  mp_size_t            mul_toom22_threshold;
+  mp_size_t            mul_toom33_threshold;
+  mp_size_t            sqr_toom2_threshold;
+  mp_size_t            sqr_toom3_threshold;
+  mp_size_t            bmod_1_to_mod_1_threshold;
+};
+__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
+__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
+#endif /* x86 fat binary */
+
+__GMP_DECLSPEC void __gmpn_cpuvec_init (void);
+
+/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
+   if that hasn't yet been done (to establish the right values).  */
+#define CPUVEC_THRESHOLD(field)						      \
+  ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
+   __gmpn_cpuvec.field)
+
+
+#if HAVE_NATIVE_mpn_add_nc
+#define mpn_add_nc __MPN(add_nc)
+__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#else
+static inline
+mp_limb_t
+mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
+{
+  mp_limb_t co;
+  co = mpn_add_n (rp, up, vp, n);
+  co += mpn_add_1 (rp, rp, n, ci);
+  return co;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_sub_nc
+#define mpn_sub_nc __MPN(sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#else
+static inline mp_limb_t
+mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
+{
+  mp_limb_t co;
+  co = mpn_sub_n (rp, up, vp, n);
+  co += mpn_sub_1 (rp, rp, n, ci);
+  return co;
+}
+#endif
+
+#if TUNE_PROGRAM_BUILD
+/* Some extras wanted when recompiling some .c files for use by the tune
+   program.  Not part of a normal build.
+
+   It's necessary to keep these thresholds as #defines (just to an
+   identically named variable), since various defaults are established based
+   on #ifdef in the .c files.  For some this is not so (the defaults are
+   instead established above), but all are done this way for consistency. */
+
+#undef	MUL_TOOM22_THRESHOLD
+#define MUL_TOOM22_THRESHOLD		mul_toom22_threshold
+extern mp_size_t			mul_toom22_threshold;
+
+#undef	MUL_TOOM33_THRESHOLD
+#define MUL_TOOM33_THRESHOLD		mul_toom33_threshold
+extern mp_size_t			mul_toom33_threshold;
+
+#undef	MUL_TOOM44_THRESHOLD
+#define MUL_TOOM44_THRESHOLD		mul_toom44_threshold
+extern mp_size_t			mul_toom44_threshold;
+
+#undef	MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD		mul_toom6h_threshold
+extern mp_size_t			mul_toom6h_threshold;
+
+#undef	MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD		mul_toom8h_threshold
+extern mp_size_t			mul_toom8h_threshold;
+
+#undef	MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD	mul_toom32_to_toom43_threshold
+extern mp_size_t			mul_toom32_to_toom43_threshold;
+
+#undef	MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD	mul_toom32_to_toom53_threshold
+extern mp_size_t			mul_toom32_to_toom53_threshold;
+
+#undef	MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD	mul_toom42_to_toom53_threshold
+extern mp_size_t			mul_toom42_to_toom53_threshold;
+
+#undef	MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD	mul_toom42_to_toom63_threshold
+extern mp_size_t			mul_toom42_to_toom63_threshold;
+
+#undef  MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD	mul_toom43_to_toom54_threshold;
+extern mp_size_t			mul_toom43_to_toom54_threshold;
+
+#undef	MUL_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD		mul_fft_threshold
+extern mp_size_t			mul_fft_threshold;
+
+#undef	MUL_FFT_MODF_THRESHOLD
+#define MUL_FFT_MODF_THRESHOLD		mul_fft_modf_threshold
+extern mp_size_t			mul_fft_modf_threshold;
+
+#undef	MUL_FFT_TABLE
+#define MUL_FFT_TABLE			{ 0 }
+
+#undef	MUL_FFT_TABLE3
+#define MUL_FFT_TABLE3			{ {0,0} }
+
+/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
+   remain as zero (always use it). */
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+#undef	SQR_BASECASE_THRESHOLD
+#define SQR_BASECASE_THRESHOLD		sqr_basecase_threshold
+extern mp_size_t			sqr_basecase_threshold;
+#endif
+
+#if TUNE_PROGRAM_BUILD_SQR
+#undef	SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD		SQR_TOOM2_MAX_GENERIC
+#else
+#undef	SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD		sqr_toom2_threshold
+extern mp_size_t			sqr_toom2_threshold;
+#endif
+
+#undef	SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD		sqr_toom3_threshold
+extern mp_size_t			sqr_toom3_threshold;
+
+#undef	SQR_TOOM4_THRESHOLD
+#define SQR_TOOM4_THRESHOLD		sqr_toom4_threshold
+extern mp_size_t			sqr_toom4_threshold;
+
+#undef	SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD		sqr_toom6_threshold
+extern mp_size_t			sqr_toom6_threshold;
+
+#undef	SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD		sqr_toom8_threshold
+extern mp_size_t			sqr_toom8_threshold;
+
+#undef  SQR_FFT_THRESHOLD
+#define SQR_FFT_THRESHOLD		sqr_fft_threshold
+extern mp_size_t			sqr_fft_threshold;
+
+#undef  SQR_FFT_MODF_THRESHOLD
+#define SQR_FFT_MODF_THRESHOLD		sqr_fft_modf_threshold
+extern mp_size_t			sqr_fft_modf_threshold;
+
+#undef	SQR_FFT_TABLE
+#define SQR_FFT_TABLE			{ 0 }
+
+#undef	SQR_FFT_TABLE3
+#define SQR_FFT_TABLE3			{ {0,0} }
+
+#undef	MULLO_BASECASE_THRESHOLD
+#define MULLO_BASECASE_THRESHOLD	mullo_basecase_threshold
+extern mp_size_t			mullo_basecase_threshold;
+
+#undef	MULLO_DC_THRESHOLD
+#define MULLO_DC_THRESHOLD		mullo_dc_threshold
+extern mp_size_t			mullo_dc_threshold;
+
+#undef	MULLO_MUL_N_THRESHOLD
+#define MULLO_MUL_N_THRESHOLD		mullo_mul_n_threshold
+extern mp_size_t			mullo_mul_n_threshold;
+
+#undef	SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD	sqrlo_basecase_threshold
+extern mp_size_t			sqrlo_basecase_threshold;
+
+#undef	SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD		sqrlo_dc_threshold
+extern mp_size_t			sqrlo_dc_threshold;
+
+#undef	SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD		sqrlo_sqr_threshold
+extern mp_size_t			sqrlo_sqr_threshold;
+
+#undef	MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD		mulmid_toom42_threshold
+extern mp_size_t			mulmid_toom42_threshold;
+
+#undef	DIV_QR_2_PI2_THRESHOLD
+#define DIV_QR_2_PI2_THRESHOLD		div_qr_2_pi2_threshold
+extern mp_size_t			div_qr_2_pi2_threshold;
+
+#undef	DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD		dc_div_qr_threshold
+extern mp_size_t			dc_div_qr_threshold;
+
+#undef	DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD		dc_divappr_q_threshold
+extern mp_size_t			dc_divappr_q_threshold;
+
+#undef	DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD		dc_bdiv_q_threshold
+extern mp_size_t			dc_bdiv_q_threshold;
+
+#undef	DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD		dc_bdiv_qr_threshold
+extern mp_size_t			dc_bdiv_qr_threshold;
+
+#undef	MU_DIV_QR_THRESHOLD
+#define MU_DIV_QR_THRESHOLD		mu_div_qr_threshold
+extern mp_size_t			mu_div_qr_threshold;
+
+#undef	MU_DIVAPPR_Q_THRESHOLD
+#define MU_DIVAPPR_Q_THRESHOLD		mu_divappr_q_threshold
+extern mp_size_t			mu_divappr_q_threshold;
+
+#undef	MUPI_DIV_QR_THRESHOLD
+#define MUPI_DIV_QR_THRESHOLD		mupi_div_qr_threshold
+extern mp_size_t			mupi_div_qr_threshold;
+
+#undef	MU_BDIV_QR_THRESHOLD
+#define MU_BDIV_QR_THRESHOLD		mu_bdiv_qr_threshold
+extern mp_size_t			mu_bdiv_qr_threshold;
+
+#undef	MU_BDIV_Q_THRESHOLD
+#define MU_BDIV_Q_THRESHOLD		mu_bdiv_q_threshold
+extern mp_size_t			mu_bdiv_q_threshold;
+
+#undef	INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD	inv_mulmod_bnm1_threshold
+extern mp_size_t			inv_mulmod_bnm1_threshold;
+
+#undef	INV_NEWTON_THRESHOLD
+#define INV_NEWTON_THRESHOLD		inv_newton_threshold
+extern mp_size_t			inv_newton_threshold;
+
+#undef	INV_APPR_THRESHOLD
+#define INV_APPR_THRESHOLD		inv_appr_threshold
+extern mp_size_t			inv_appr_threshold;
+
+#undef	BINV_NEWTON_THRESHOLD
+#define BINV_NEWTON_THRESHOLD		binv_newton_threshold
+extern mp_size_t			binv_newton_threshold;
+
+#undef	REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD	redc_1_to_redc_2_threshold
+extern mp_size_t			redc_1_to_redc_2_threshold;
+
+#undef	REDC_2_TO_REDC_N_THRESHOLD
+#define REDC_2_TO_REDC_N_THRESHOLD	redc_2_to_redc_n_threshold
+extern mp_size_t			redc_2_to_redc_n_threshold;
+
+#undef	REDC_1_TO_REDC_N_THRESHOLD
+#define REDC_1_TO_REDC_N_THRESHOLD	redc_1_to_redc_n_threshold
+extern mp_size_t			redc_1_to_redc_n_threshold;
+
+#undef	MATRIX22_STRASSEN_THRESHOLD
+#define MATRIX22_STRASSEN_THRESHOLD	matrix22_strassen_threshold
+extern mp_size_t			matrix22_strassen_threshold;
+
+typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,
+			  struct hgcd_matrix1 *);
+extern hgcd2_func_t *hgcd2_func;
+
+#undef	HGCD_THRESHOLD
+#define HGCD_THRESHOLD			hgcd_threshold
+extern mp_size_t			hgcd_threshold;
+
+#undef	HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD		hgcd_appr_threshold
+extern mp_size_t			hgcd_appr_threshold;
+
+#undef	HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD		hgcd_reduce_threshold
+extern mp_size_t			hgcd_reduce_threshold;
+
+#undef	GCD_DC_THRESHOLD
+#define GCD_DC_THRESHOLD		gcd_dc_threshold
+extern mp_size_t			gcd_dc_threshold;
+
+#undef  GCDEXT_DC_THRESHOLD
+#define GCDEXT_DC_THRESHOLD		gcdext_dc_threshold
+extern mp_size_t			gcdext_dc_threshold;
+
+#undef  DIV_QR_1N_PI1_METHOD
+#define DIV_QR_1N_PI1_METHOD		div_qr_1n_pi1_method
+extern int				div_qr_1n_pi1_method;
+
+#undef  DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD		div_qr_1_norm_threshold
+extern mp_size_t			div_qr_1_norm_threshold;
+
+#undef  DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD	div_qr_1_unnorm_threshold
+extern mp_size_t			div_qr_1_unnorm_threshold;
+
+#undef  DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD		divrem_1_norm_threshold
+extern mp_size_t			divrem_1_norm_threshold;
+
+#undef  DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD	divrem_1_unnorm_threshold
+extern mp_size_t			divrem_1_unnorm_threshold;
+
+#undef	MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD		mod_1_norm_threshold
+extern mp_size_t			mod_1_norm_threshold;
+
+#undef	MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD		mod_1_unnorm_threshold
+extern mp_size_t			mod_1_unnorm_threshold;
+
+#undef  MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD			mod_1_1p_method
+extern int				mod_1_1p_method;
+
+#undef	MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD	mod_1n_to_mod_1_1_threshold
+extern mp_size_t			mod_1n_to_mod_1_1_threshold;
+
+#undef	MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD	mod_1u_to_mod_1_1_threshold
+extern mp_size_t			mod_1u_to_mod_1_1_threshold;
+
+#undef	MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD	mod_1_1_to_mod_1_2_threshold
+extern mp_size_t			mod_1_1_to_mod_1_2_threshold;
+
+#undef	MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD	mod_1_2_to_mod_1_4_threshold
+extern mp_size_t			mod_1_2_to_mod_1_4_threshold;
+
+#undef	PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD	preinv_mod_1_to_mod_1_threshold
+extern mp_size_t			preinv_mod_1_to_mod_1_threshold;
+
+#if ! UDIV_PREINV_ALWAYS
+#undef	DIVREM_2_THRESHOLD
+#define DIVREM_2_THRESHOLD		divrem_2_threshold
+extern mp_size_t			divrem_2_threshold;
+#endif
+
+#undef	MULMOD_BNM1_THRESHOLD
+#define MULMOD_BNM1_THRESHOLD		mulmod_bnm1_threshold
+extern mp_size_t			mulmod_bnm1_threshold;
+
+#undef	SQRMOD_BNM1_THRESHOLD
+#define SQRMOD_BNM1_THRESHOLD		sqrmod_bnm1_threshold
+extern mp_size_t			sqrmod_bnm1_threshold;
+
+#undef	GET_STR_DC_THRESHOLD
+#define GET_STR_DC_THRESHOLD		get_str_dc_threshold
+extern mp_size_t			get_str_dc_threshold;
+
+#undef  GET_STR_PRECOMPUTE_THRESHOLD
+#define GET_STR_PRECOMPUTE_THRESHOLD	get_str_precompute_threshold
+extern mp_size_t			get_str_precompute_threshold;
+
+#undef	SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD		set_str_dc_threshold
+extern mp_size_t			set_str_dc_threshold;
+
+#undef  SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD	set_str_precompute_threshold
+extern mp_size_t			set_str_precompute_threshold;
+
+#undef  FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD		fac_odd_threshold
+extern  mp_size_t			fac_odd_threshold;
+
+#undef  FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD		fac_dsc_threshold
+extern  mp_size_t			fac_dsc_threshold;
+
+#undef  FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS
+extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
+#define FFT_TABLE3_SIZE 2000	/* generous space for tuning */
+extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
+
+/* Sizes the tune program tests up to, used in a couple of recompilations. */
+#undef MUL_TOOM22_THRESHOLD_LIMIT
+#undef MUL_TOOM33_THRESHOLD_LIMIT
+#undef MULLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_DC_THRESHOLD_LIMIT
+#undef SQR_TOOM3_THRESHOLD_LIMIT
+#define SQR_TOOM2_MAX_GENERIC           200
+#define MUL_TOOM22_THRESHOLD_LIMIT      700
+#define MUL_TOOM33_THRESHOLD_LIMIT      700
+#define SQR_TOOM3_THRESHOLD_LIMIT       400
+#define MUL_TOOM44_THRESHOLD_LIMIT     1000
+#define SQR_TOOM4_THRESHOLD_LIMIT      1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT     1100
+#define SQR_TOOM6_THRESHOLD_LIMIT      1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT     1200
+#define SQR_TOOM8_THRESHOLD_LIMIT      1200
+#define MULLO_BASECASE_THRESHOLD_LIMIT  200
+#define SQRLO_BASECASE_THRESHOLD_LIMIT  200
+#define SQRLO_DC_THRESHOLD_LIMIT        400
+#define GET_STR_THRESHOLD_LIMIT         150
+#define FAC_DSC_THRESHOLD_LIMIT        2048
+
+#endif /* TUNE_PROGRAM_BUILD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* FIXME: Make these itch functions less conservative.  Also consider making
+   them dependent on just 'an', and compute the allocation directly from 'an'
+   instead of via n.  */
+
+/* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
+   k is ths smallest k such that
+     ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
+   which implies that
+     k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
+       = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
+*/
+#define mpn_toom22_mul_itch(an, bn) \
+  (2 * ((an) + GMP_NUMB_BITS))
+#define mpn_toom2_sqr_itch(an) \
+  (2 * ((an) + GMP_NUMB_BITS))
+
+/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
+#define mpn_toom33_mul_itch(an, bn) \
+  (3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom3_sqr_itch(an) \
+  (3 * (an) + GMP_NUMB_BITS)
+
+/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
+#define mpn_toom44_mul_itch(an, bn) \
+  (3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom4_sqr_itch(an) \
+  (3 * (an) + GMP_NUMB_BITS)
+
+#define mpn_toom6_sqr_itch(n)						\
+  (((n) - SQR_TOOM6_THRESHOLD)*2 +					\
+   MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,				\
+       mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
+
+#define MUL_TOOM6H_MIN							\
+  ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ?			\
+    MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
+#define mpn_toom6_mul_n_itch(n)						\
+  (((n) - MUL_TOOM6H_MIN)*2 +						\
+   MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6,				\
+       mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 10 + 1;
+  return mpn_toom6_mul_n_itch (estimatedN * 6);
+}
+
+#define mpn_toom8_sqr_itch(n)						\
+  ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +			\
+   MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,			\
+       mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
+
+#define MUL_TOOM8H_MIN							\
+  ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ?				\
+    MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
+#define mpn_toom8_mul_n_itch(n)						\
+  ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) +				\
+   MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6,			\
+       mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
+
+static inline mp_size_t
+mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 14 + 1;
+  return mpn_toom8_mul_n_itch (estimatedN * 8);
+}
+
+static inline mp_size_t
+mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
+  mp_size_t itch = 2 * n + 1;
+
+  return itch;
+}
+
+static inline mp_size_t
+mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+  return 6 * n + 3;
+}
+
+static inline mp_size_t
+mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+  return 6*n + 4;
+}
+
+static inline mp_size_t
+mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+  return 6*n + 4;
+}
+
+static inline mp_size_t
+mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+  return 10 * n + 10;
+}
+
+static inline mp_size_t
+mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+  return 10 * n + 10;
+}
+
+static inline mp_size_t
+mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+  return 9 * n + 3;
+}
+
+static inline mp_size_t
+mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+  return 9 * n + 3;
+}
+
+/* let S(n) = space required for input size n,
+   then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)).   */
+#define mpn_toom42_mulmid_itch(n) \
+  (3 * (n) + GMP_NUMB_BITS)
+
+#if 0
+#define mpn_fft_mul mpn_mul_fft_full
+#else
+#define mpn_fft_mul mpn_nussbaumer_mul
+#endif
+
+#ifdef __cplusplus
+
+/* A little helper for a null-terminated __gmp_allocate_func string.
+   The destructor ensures it's freed even if an exception is thrown.
+   The len field is needed by the destructor, and can be used by anyone else
+   to avoid a second strlen pass over the data.
+
+   Since our input is a C string, using strlen is correct.  Perhaps it'd be
+   more C++-ish style to use std::char_traits<char>::length, but char_traits
+   isn't available in gcc 2.95.4.  */
+
+class gmp_allocated_string {
+ public:
+  char *str;
+  size_t len;
+  gmp_allocated_string(char *arg)
+  {
+    str = arg;
+    len = std::strlen (str);
+  }
+  ~gmp_allocated_string()
+  {
+    (*__gmp_free_func) (str, len+1);
+  }
+};
+
+std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
+int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
+void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
+void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
+std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
+extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
+
+#endif /* __cplusplus */
+
+#endif /* __GMP_IMPL_H__ */

diff --git a/gmpxx.h b/gmpxx.h
new file mode 100644
index 0000000..62d159c
--- /dev/null
+++ b/gmpxx.h

@@ -0,0 +1,3871 @@
+/* gmpxx.h -- C++ class wrapper for GMP types.  -*- C++ -*-
+
+Copyright 2001-2003, 2006, 2008, 2011-2015, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#ifndef __GMP_PLUSPLUS__
+#define __GMP_PLUSPLUS__
+
+#include <iosfwd>
+
+#include <cstring>  /* for strlen */
+#include <limits>  /* numeric_limits */
+#include <utility>
+#include <algorithm>  /* swap */
+#include <string>
+#ifdef __EXCEPTIONS
+#include <stdexcept>
+#else
+#include <cstdlib>
+#include <iostream>
+#endif
+#include <cfloat>
+#include <gmp.h>
+
+// wrapper for gcc's __builtin_constant_p
+// __builtin_constant_p has been in gcc since forever,
+// but g++-3.4 miscompiles it.
+#if __GMP_GNUC_PREREQ(4, 2)
+#define __GMPXX_CONSTANT(X) __builtin_constant_p(X)
+#else
+#define __GMPXX_CONSTANT(X) false
+#endif
+#define __GMPXX_CONSTANT_TRUE(X) (__GMPXX_CONSTANT(X) && (X))
+
+// Use C++11 features
+#ifndef __GMPXX_USE_CXX11
+#if __cplusplus >= 201103L
+#define __GMPXX_USE_CXX11 1
+#else
+#define __GMPXX_USE_CXX11 0
+#endif
+#endif
+
+#if __GMPXX_USE_CXX11
+#define __GMPXX_NOEXCEPT noexcept
+#include <type_traits> // for common_type
+#else
+#define __GMPXX_NOEXCEPT
+#endif
+
+// Max allocations for plain types when converted to GMP types
+#if GMP_NAIL_BITS != 0 && ! defined _LONG_LONG_LIMB
+#define __GMPZ_ULI_LIMBS 2
+#else
+#define __GMPZ_ULI_LIMBS 1
+#endif
+
+#define __GMPXX_BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
+#define __GMPZ_DBL_LIMBS __GMPXX_BITS_TO_LIMBS(DBL_MAX_EXP)+1
+#define __GMPQ_NUM_DBL_LIMBS __GMPZ_DBL_LIMBS
+#define __GMPQ_DEN_DBL_LIMBS __GMPXX_BITS_TO_LIMBS(DBL_MANT_DIG+1-DBL_MIN_EXP)+1
+// The final +1s are a security margin. The current implementation of
+// mpq_set_d seems to need it for the denominator.
+
+inline void __mpz_set_ui_safe(mpz_ptr p, unsigned long l)
+{
+  p->_mp_size = (l != 0);
+  p->_mp_d[0] = l & GMP_NUMB_MASK;
+#if __GMPZ_ULI_LIMBS > 1
+  l >>= GMP_NUMB_BITS;
+  p->_mp_d[1] = l;
+  p->_mp_size += (l != 0);
+#endif
+}
+
+inline void __mpz_set_si_safe(mpz_ptr p, long l)
+{
+  if(l < 0)
+  {
+    __mpz_set_ui_safe(p, -static_cast<unsigned long>(l));
+    mpz_neg(p, p);
+  }
+  else
+    __mpz_set_ui_safe(p, l);
+    // Note: we know the high bit of l is 0 so we could do slightly better
+}
+
+// Fake temporary variables
+#define __GMPXX_TMPZ_UI							\
+  mpz_t temp;								\
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS];					\
+  temp->_mp_d = limbs;							\
+  __mpz_set_ui_safe (temp, l)
+#define __GMPXX_TMPZ_SI							\
+  mpz_t temp;								\
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS];					\
+  temp->_mp_d = limbs;							\
+  __mpz_set_si_safe (temp, l)
+#define __GMPXX_TMPZ_D							\
+  mpz_t temp;								\
+  mp_limb_t limbs[__GMPZ_DBL_LIMBS];					\
+  temp->_mp_d = limbs;							\
+  temp->_mp_alloc = __GMPZ_DBL_LIMBS;					\
+  mpz_set_d (temp, d)
+
+#define __GMPXX_TMPQ_UI							\
+  mpq_t temp;								\
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];					\
+  mpq_numref(temp)->_mp_d = limbs;					\
+  __mpz_set_ui_safe (mpq_numref(temp), l);				\
+  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;			\
+  mpq_denref(temp)->_mp_size = 1;					\
+  mpq_denref(temp)->_mp_d[0] = 1
+#define __GMPXX_TMPQ_SI							\
+  mpq_t temp;								\
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];					\
+  mpq_numref(temp)->_mp_d = limbs;					\
+  __mpz_set_si_safe (mpq_numref(temp), l);				\
+  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;			\
+  mpq_denref(temp)->_mp_size = 1;					\
+  mpq_denref(temp)->_mp_d[0] = 1
+#define __GMPXX_TMPQ_D							\
+  mpq_t temp;								\
+  mp_limb_t limbs[__GMPQ_NUM_DBL_LIMBS + __GMPQ_DEN_DBL_LIMBS];		\
+  mpq_numref(temp)->_mp_d = limbs;					\
+  mpq_numref(temp)->_mp_alloc = __GMPQ_NUM_DBL_LIMBS;			\
+  mpq_denref(temp)->_mp_d = limbs + __GMPQ_NUM_DBL_LIMBS;		\
+  mpq_denref(temp)->_mp_alloc = __GMPQ_DEN_DBL_LIMBS;			\
+  mpq_set_d (temp, d)
+
+inline unsigned long __gmpxx_abs_ui (signed long l)
+{
+  return l >= 0 ? static_cast<unsigned long>(l)
+	  : -static_cast<unsigned long>(l);
+}
+
+/**************** Function objects ****************/
+/* Any evaluation of a __gmp_expr ends up calling one of these functions
+   all intermediate functions being inline, the evaluation should optimize
+   to a direct call to the relevant function, thus yielding no overhead
+   over the C interface. */
+
+struct __gmp_unary_plus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_set(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_set(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_set(f, g); }
+};
+
+struct __gmp_unary_minus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_neg(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_neg(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_neg(f, g); }
+};
+
+struct __gmp_unary_com
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_com(z, w); }
+};
+
+struct __gmp_binary_plus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_add(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {
+    // Ideally, those checks should happen earlier so that the tree
+    // generated for a+0+b would just be sum(a,b).
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_add_ui(z, w, l);
+  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      eval(z, w, static_cast<unsigned long>(l));
+    else
+      mpz_sub_ui(z, w, -static_cast<unsigned long>(l));
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_add (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_add(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else if (__GMPXX_CONSTANT(l) && l == 1)
+    {
+      mpz_add (mpq_numref(q), mpq_numref(r), mpq_denref(r));
+      if (q != r) mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+    else
+    {
+      if (q == r)
+        mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+      else
+      {
+        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+        mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+        mpz_set(mpq_denref(q), mpq_denref(r));
+      }
+    }
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { eval(q, r, l); }
+  static inline void eval(mpq_ptr q, mpq_srcptr r, signed long int l);
+  // defined after __gmp_binary_minus
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  { eval(q, r, l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {  __GMPXX_TMPQ_D;    mpq_add (q, r, temp); }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  { eval(q, r, d); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
+  {
+    if (q == r)
+      mpz_addmul(mpq_numref(q), mpq_denref(q), z);
+    else
+    {
+      mpz_mul(mpq_numref(q), mpq_denref(r), z);
+      mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+      mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+  }
+  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
+  { eval(q, r, z); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_add(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_add_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_add_ui(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_add_ui(f, g, l);
+    else
+      mpf_sub_ui(f, g, -static_cast<unsigned long>(l));
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  { eval(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_add(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  { eval(f, g, d); }
+};
+
+struct __gmp_binary_minus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_sub(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_sub_ui(z, w, l);
+  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      mpz_neg(z, w);
+    }
+    else
+      mpz_ui_sub(z, l, w);
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      eval(z, w, static_cast<unsigned long>(l));
+    else
+      mpz_add_ui(z, w, -static_cast<unsigned long>(l));
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (l >= 0)
+      eval(z, static_cast<unsigned long>(l), w);
+    else
+      {
+        mpz_add_ui(z, w, -static_cast<unsigned long>(l));
+        mpz_neg(z, z);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_sub (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMPZ_D;    mpz_sub (z, temp, w); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_sub(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else if (__GMPXX_CONSTANT(l) && l == 1)
+    {
+      mpz_sub (mpq_numref(q), mpq_numref(r), mpq_denref(r));
+      if (q != r) mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+    else
+    {
+      if (q == r)
+        mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+      else
+      {
+        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+        mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+        mpz_set(mpq_denref(q), mpq_denref(r));
+      }
+    }
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { eval(q, r, l); mpq_neg(q, q); }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    if (l >= 0)
+      eval(q, r, static_cast<unsigned long>(l));
+    else
+      __gmp_binary_plus::eval(q, r, -static_cast<unsigned long>(l));
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  { eval(q, r, l); mpq_neg(q, q); }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {  __GMPXX_TMPQ_D;    mpq_sub (q, r, temp); }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {  __GMPXX_TMPQ_D;    mpq_sub (q, temp, r); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
+  {
+    if (q == r)
+      mpz_submul(mpq_numref(q), mpq_denref(q), z);
+    else
+    {
+      mpz_mul(mpq_numref(q), mpq_denref(r), z);
+      mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+      mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+  }
+  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
+  { eval(q, r, z); mpq_neg(q, q); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_sub(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_sub_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_ui_sub(f, l, g); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_sub_ui(f, g, l);
+    else
+      mpf_add_ui(f, g, -static_cast<unsigned long>(l));
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_sub_ui(f, g, l);
+    else
+      mpf_add_ui(f, g, -static_cast<unsigned long>(l));
+    mpf_neg(f, f);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_sub(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_sub(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+// defined here so it can reference __gmp_binary_minus
+inline void
+__gmp_binary_plus::eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+{
+  if (l >= 0)
+    eval(q, r, static_cast<unsigned long>(l));
+  else
+    __gmp_binary_minus::eval(q, r, -static_cast<unsigned long>(l));
+}
+
+struct __gmp_binary_lshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_mul_2exp(z, w, l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else
+      mpq_mul_2exp(q, r, l);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+  { mpf_mul_2exp(f, g, l); }
+};
+
+struct __gmp_binary_rshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_fdiv_q_2exp(z, w, l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else
+      mpq_div_2exp(q, r, l);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+  { mpf_div_2exp(f, g, l); }
+};
+
+struct __gmp_binary_multiplies
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_mul(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {
+// gcc-3.3 doesn't have __builtin_ctzl. Don't bother optimizing for old gcc.
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+    {
+      if (l == 0)
+      {
+        z->_mp_size = 0;
+      }
+      else
+      {
+        __gmp_binary_lshift::eval(z, w, __builtin_ctzl(l));
+      }
+    }
+    else
+#endif
+      mpz_mul_ui(z, w, l);
+  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      eval(z, w, static_cast<unsigned long>(l));
+    else if (__GMPXX_CONSTANT_TRUE(l <= 0))
+      {
+        eval(z, w, -static_cast<unsigned long>(l));
+	mpz_neg(z, z);
+      }
+    else
+      mpz_mul_si (z, w, l);
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_mul (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_mul(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+    {
+      if (l == 0)
+      {
+	mpq_set_ui(q, 0, 1);
+      }
+      else
+      {
+        __gmp_binary_lshift::eval(q, r, __builtin_ctzl(l));
+      }
+    }
+    else
+#endif
+    {
+      __GMPXX_TMPQ_UI;
+      mpq_mul (q, r, temp);
+    }
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { eval(q, r, l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      eval(q, r, static_cast<unsigned long>(l));
+    else if (__GMPXX_CONSTANT_TRUE(l <= 0))
+      {
+        eval(q, r, -static_cast<unsigned long>(l));
+	mpq_neg(q, q);
+      }
+    else
+      {
+	__GMPXX_TMPQ_SI;
+	mpq_mul (q, r, temp);
+      }
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  { eval(q, r, l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {  __GMPXX_TMPQ_D;    mpq_mul (q, r, temp); }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  { eval(q, r, d); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_mul(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_mul_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_mul_ui(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_mul_ui(f, g, l);
+    else
+      {
+	mpf_mul_ui(f, g, -static_cast<unsigned long>(l));
+	mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  { eval(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_mul(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  { eval(f, g, d); }
+};
+
+struct __gmp_binary_divides
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_tdiv_q(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {
+#if __GMP_GNUC_PREREQ(3, 4)
+    // Don't optimize division by 0...
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+    {
+      if (l == 1)
+      {
+        if (z != w) mpz_set(z, w);
+      }
+      else
+        mpz_tdiv_q_2exp(z, w, __builtin_ctzl(l));
+        // warning: do not use rshift (fdiv)
+    }
+    else
+#endif
+      mpz_tdiv_q_ui(z, w, l);
+  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {
+    if (mpz_sgn(w) >= 0)
+      {
+	if (mpz_fits_ulong_p(w))
+	  mpz_set_ui(z, l / mpz_get_ui(w));
+	else
+	  mpz_set_ui(z, 0);
+      }
+    else
+      {
+	mpz_neg(z, w);
+	if (mpz_fits_ulong_p(z))
+	  {
+	    mpz_set_ui(z, l / mpz_get_ui(z));
+	    mpz_neg(z, z);
+	  }
+	else
+	  mpz_set_ui(z, 0);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      eval(z, w, static_cast<unsigned long>(l));
+    else
+      {
+	eval(z, w, -static_cast<unsigned long>(l));
+	mpz_neg(z, z);
+      }
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (mpz_fits_slong_p(w))
+      mpz_set_si(z, l / mpz_get_si(w));
+    else
+      {
+        /* if w is bigger than a long then the quotient must be zero, unless
+           l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */
+        mpz_set_si (z, (mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? -1 : 0));
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, temp, w); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_div(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+      __gmp_binary_rshift::eval(q, r, __builtin_ctzl(l));
+    else
+#endif
+    {
+      __GMPXX_TMPQ_UI;
+      mpq_div (q, r, temp);
+    }
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l == 0))
+      mpq_set_ui(q, 0, 1);
+    else if (__GMPXX_CONSTANT_TRUE(l == 1))
+      mpq_inv(q, r);
+    else
+      {
+	__GMPXX_TMPQ_UI;
+	mpq_div (q, temp, r);
+      }
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      eval(q, r, static_cast<unsigned long>(l));
+    else if (__GMPXX_CONSTANT_TRUE(l <= 0))
+      {
+        eval(q, r, -static_cast<unsigned long>(l));
+	mpq_neg(q, q);
+      }
+    else
+      {
+	__GMPXX_TMPQ_SI;
+	mpq_div (q, r, temp);
+      }
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l == 0))
+      mpq_set_ui(q, 0, 1);
+    else if (__GMPXX_CONSTANT_TRUE(l == 1))
+      mpq_inv(q, r);
+    else if (__GMPXX_CONSTANT_TRUE(l == -1))
+      {
+	mpq_inv(q, r);
+	mpq_neg(q, q);
+      }
+    else
+      {
+	__GMPXX_TMPQ_SI;
+	mpq_div (q, temp, r);
+      }
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {  __GMPXX_TMPQ_D;    mpq_div (q, r, temp); }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {  __GMPXX_TMPQ_D;    mpq_div (q, temp, r); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_div(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_div_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_ui_div(f, l, g); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_div_ui(f, g, l);
+    else
+      {
+	mpf_div_ui(f, g, -static_cast<unsigned long>(l));
+	mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_ui_div(f, l, g);
+    else
+      {
+	mpf_ui_div(f, -static_cast<unsigned long>(l), g);
+	mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_div(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_div(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_binary_modulus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_tdiv_r(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_tdiv_r_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {
+    if (mpz_sgn(w) >= 0)
+      {
+	if (mpz_fits_ulong_p(w))
+	  mpz_set_ui(z, l % mpz_get_ui(w));
+	else
+	  mpz_set_ui(z, l);
+      }
+    else
+      {
+	mpz_neg(z, w);
+	if (mpz_fits_ulong_p(z))
+	  mpz_set_ui(z, l % mpz_get_ui(z));
+	else
+	  mpz_set_ui(z, l);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    mpz_tdiv_r_ui (z, w, __gmpxx_abs_ui(l));
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (mpz_fits_slong_p(w))
+      mpz_set_si(z, l % mpz_get_si(w));
+    else
+      {
+        /* if w is bigger than a long then the remainder is l unchanged,
+           unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */
+        mpz_set_si (z, mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? 0 : l);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, temp, w); }
+};
+
+struct __gmp_binary_and
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_and(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMPZ_UI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMPZ_SI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_and (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d);  }
+};
+
+struct __gmp_binary_ior
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_ior(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMPZ_UI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMPZ_SI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_ior (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d);  }
+};
+
+struct __gmp_binary_xor
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_xor(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMPZ_UI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMPZ_SI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_xor (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d);  }
+};
+
+struct __gmp_cmp_function
+{
+  static int eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w); }
+
+  static int eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l); }
+  static int eval(unsigned long int l, mpz_srcptr z)
+  { return -mpz_cmp_ui(z, l); }
+  static int eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l); }
+  static int eval(signed long int l, mpz_srcptr z)
+  { return -mpz_cmp_si(z, l); }
+  static int eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d); }
+  static int eval(double d, mpz_srcptr z)
+  { return -mpz_cmp_d(z, d); }
+
+  static int eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r); }
+
+  static int eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1); }
+  static int eval(unsigned long int l, mpq_srcptr q)
+  { return -mpq_cmp_ui(q, l, 1); }
+  static int eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1); }
+  static int eval(signed long int l, mpq_srcptr q)
+  { return -mpq_cmp_si(q, l, 1); }
+  static int eval(mpq_srcptr q, double d)
+  {  __GMPXX_TMPQ_D;    return mpq_cmp (q, temp); }
+  static int eval(double d, mpq_srcptr q)
+  {  __GMPXX_TMPQ_D;    return mpq_cmp (temp, q); }
+  static int eval(mpq_srcptr q, mpz_srcptr z)
+  { return mpq_cmp_z(q, z); }
+  static int eval(mpz_srcptr z, mpq_srcptr q)
+  { return -mpq_cmp_z(q, z); }
+
+  static int eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g); }
+
+  static int eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l); }
+  static int eval(unsigned long int l, mpf_srcptr f)
+  { return -mpf_cmp_ui(f, l); }
+  static int eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l); }
+  static int eval(signed long int l, mpf_srcptr f)
+  { return -mpf_cmp_si(f, l); }
+  static int eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d); }
+  static int eval(double d, mpf_srcptr f)
+  { return -mpf_cmp_d(f, d); }
+  static int eval(mpf_srcptr f, mpz_srcptr z)
+  { return mpf_cmp_z(f, z); }
+  static int eval(mpz_srcptr z, mpf_srcptr f)
+  { return -mpf_cmp_z(f, z); }
+  static int eval(mpf_srcptr f, mpq_srcptr q)
+  {
+    mpf_t qf;
+    mpf_init(qf); /* Should we use the precision of f?  */
+    mpf_set_q(qf, q);
+    int ret = eval(f, qf);
+    mpf_clear(qf);
+    return ret;
+  }
+  static int eval(mpq_srcptr q, mpf_srcptr f)
+  { return -eval(f, q); }
+};
+
+struct __gmp_binary_equal
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) == 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) == 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return eval(z, l); }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) == 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return eval(z, l); }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) == 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return eval(z, d); }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r)
+  { return mpq_equal(q, r) != 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return ((__GMPXX_CONSTANT(l) && l == 0) ||
+	    mpz_cmp_ui(mpq_denref(q), 1) == 0) &&
+      mpz_cmp_ui(mpq_numref(q), l) == 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return eval(q, l); }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return ((__GMPXX_CONSTANT(l) && l == 0) ||
+	    mpz_cmp_ui(mpq_denref(q), 1) == 0) &&
+      mpz_cmp_si(mpq_numref(q), l) == 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return eval(q, l); }
+  static bool eval(mpq_srcptr q, double d)
+  {  __GMPXX_TMPQ_D;    return mpq_equal (q, temp) != 0; }
+  static bool eval(double d, mpq_srcptr q)
+  { return eval(q, d); }
+  static bool eval(mpq_srcptr q, mpz_srcptr z)
+  { return mpz_cmp_ui(mpq_denref(q), 1) == 0 && mpz_cmp(mpq_numref(q), z) == 0; }
+  static bool eval(mpz_srcptr z, mpq_srcptr q)
+  { return eval(q, z); }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) == 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return eval(f, l); }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) == 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return eval(f, l); }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) == 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return eval(f, d); }
+  static bool eval(mpf_srcptr f, mpz_srcptr z)
+  { return mpf_cmp_z(f, z) == 0; }
+  static bool eval(mpz_srcptr z, mpf_srcptr f)
+  { return eval(f, z); }
+  static bool eval(mpf_srcptr f, mpq_srcptr q)
+  { return __gmp_cmp_function::eval(f, q) == 0; }
+  static bool eval(mpq_srcptr q, mpf_srcptr f)
+  { return eval(f, q); }
+};
+
+struct __gmp_binary_less
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) < 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) > 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) < 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) > 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) < 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) > 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) < 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) > 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) < 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) > 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {  __GMPXX_TMPQ_D;    return mpq_cmp (q, temp) < 0; }
+  static bool eval(double d, mpq_srcptr q)
+  {  __GMPXX_TMPQ_D;    return mpq_cmp (temp, q) < 0; }
+  static bool eval(mpq_srcptr q, mpz_srcptr z)
+  { return mpq_cmp_z(q, z) < 0; }
+  static bool eval(mpz_srcptr z, mpq_srcptr q)
+  { return mpq_cmp_z(q, z) > 0; }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) < 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) > 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) < 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) > 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) < 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) > 0; }
+  static bool eval(mpf_srcptr f, mpz_srcptr z)
+  { return mpf_cmp_z(f, z) < 0; }
+  static bool eval(mpz_srcptr z, mpf_srcptr f)
+  { return mpf_cmp_z(f, z) > 0; }
+  static bool eval(mpf_srcptr f, mpq_srcptr q)
+  { return __gmp_cmp_function::eval(f, q) < 0; }
+  static bool eval(mpq_srcptr q, mpf_srcptr f)
+  { return __gmp_cmp_function::eval(q, f) < 0; }
+};
+
+struct __gmp_binary_greater
+{
+  template <class T, class U>
+  static inline bool eval(T t, U u) { return __gmp_binary_less::eval(u, t); }
+};
+
+struct __gmp_unary_increment
+{
+  static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); }
+  static void eval(mpq_ptr q)
+  { mpz_add(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }
+  static void eval(mpf_ptr f) { mpf_add_ui(f, f, 1); }
+};
+
+struct __gmp_unary_decrement
+{
+  static void eval(mpz_ptr z) { mpz_sub_ui(z, z, 1); }
+  static void eval(mpq_ptr q)
+  { mpz_sub(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }
+  static void eval(mpf_ptr f) { mpf_sub_ui(f, f, 1); }
+};
+
+struct __gmp_abs_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_abs(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_abs(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_abs(f, g); }
+};
+
+struct __gmp_trunc_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_trunc(f, g); }
+};
+
+struct __gmp_floor_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_floor(f, g); }
+};
+
+struct __gmp_ceil_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_ceil(f, g); }
+};
+
+struct __gmp_sqrt_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_sqrt(z, w); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_sqrt(f, g); }
+};
+
+struct __gmp_hypot_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_mul(f, h, h);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_ui(f, l);
+    mpf_mul_ui(f, f, l);
+    mpf_add(f, f, temp);
+    mpf_clear(temp);
+    mpf_sqrt(f, f);
+  }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { eval(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  { eval(f, g, __gmpxx_abs_ui(l)); }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  { eval(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_d(f, d);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  { eval(f, g, d); }
+};
+
+struct __gmp_sgn_function
+{
+  static int eval(mpz_srcptr z) { return mpz_sgn(z); }
+  static int eval(mpq_srcptr q) { return mpq_sgn(q); }
+  static int eval(mpf_srcptr f) { return mpf_sgn(f); }
+};
+
+struct __gmp_gcd_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_gcd(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_gcd_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  { eval(z, w, __gmpxx_abs_ui(l)); }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_gcd (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d); }
+};
+
+struct __gmp_lcm_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_lcm(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_lcm_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  { eval(z, w, __gmpxx_abs_ui(l)); }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_lcm (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  { eval(z, w, d); }
+};
+
+struct __gmp_rand_function
+{
+  static void eval(mpz_ptr z, gmp_randstate_t s, mp_bitcnt_t l)
+  { mpz_urandomb(z, s, l); }
+  static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w)
+  { mpz_urandomm(z, s, w); }
+  static void eval(mpf_ptr f, gmp_randstate_t s, mp_bitcnt_t prec)
+  { mpf_urandomb(f, s, prec); }
+};
+
+struct __gmp_fac_function
+{
+  static void eval(mpz_ptr z, unsigned long l) { mpz_fac_ui(z, l); }
+  static void eval(mpz_ptr z, signed long l)
+  {
+    if (l < 0)
+#ifdef __EXCEPTIONS
+      throw std::domain_error ("factorial(negative)");
+#else
+      {
+        std::cerr << "std::domain_error: factorial(negative)\n";
+        std::abort();
+      }
+#endif
+    eval(z, static_cast<unsigned long>(l));
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w)
+  {
+    if (!mpz_fits_ulong_p(w))
+      {
+	if (mpz_sgn(w) < 0)
+#ifdef __EXCEPTIONS
+	  throw std::domain_error ("factorial(negative)");
+#else
+         {
+	    std::cerr << "std::domain_error: factorial(negative)\n";
+           std::abort();
+         }
+#endif
+	else
+#ifdef __EXCEPTIONS
+	  throw std::bad_alloc(); // or std::overflow_error ("factorial")?
+#else
+         {
+	    std::cerr << "std::bad_alloc\n";
+           std::abort();
+         }
+#endif
+      }
+    eval(z, mpz_get_ui(w));
+  }
+  static void eval(mpz_ptr z, double d)
+  {  __GMPXX_TMPZ_D;    eval (z, temp); }
+};
+
+struct __gmp_primorial_function
+{
+  static void eval(mpz_ptr z, unsigned long l) { mpz_primorial_ui(z, l); }
+  static void eval(mpz_ptr z, signed long l)
+  {
+    if (l < 0)
+#ifdef __EXCEPTIONS
+      throw std::domain_error ("primorial(negative)");
+#else
+      {
+        std::cerr << "std::domain_error: primorial(negative)\n";
+        std::abort();
+      }
+#endif
+    eval(z, static_cast<unsigned long>(l));
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w)
+  {
+    if (!mpz_fits_ulong_p(w))
+      {
+	if (mpz_sgn(w) < 0)
+#ifdef __EXCEPTIONS
+	  throw std::domain_error ("primorial(negative)");
+#else
+         {
+	    std::cerr << "std::domain_error: primorial(negative)\n";
+           std::abort();
+         }
+#endif
+	else
+#ifdef __EXCEPTIONS
+	  throw std::bad_alloc(); // or std::overflow_error ("primorial")?
+#else
+         {
+	    std::cerr << "std::bad_alloc\n";
+           std::abort();
+         }
+#endif
+      }
+    eval(z, mpz_get_ui(w));
+  }
+  static void eval(mpz_ptr z, double d)
+  {  __GMPXX_TMPZ_D;    eval (z, temp); }
+};
+
+struct __gmp_fib_function
+{
+  static void eval(mpz_ptr z, unsigned long l) { mpz_fib_ui(z, l); }
+  static void eval(mpz_ptr z, signed long l)
+  {
+    if (l < 0)
+      {
+	eval(z, -static_cast<unsigned long>(l));
+	if ((l & 1) == 0)
+	  mpz_neg(z, z);
+      }
+    else
+      eval(z, static_cast<unsigned long>(l));
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w)
+  {
+    if (!mpz_fits_slong_p(w))
+#ifdef __EXCEPTIONS
+      throw std::bad_alloc(); // or std::overflow_error ("fibonacci")?
+#else
+      {
+        std::cerr << "std::bad_alloc\n";
+        std::abort();
+      }
+#endif
+    eval(z, mpz_get_si(w));
+  }
+  static void eval(mpz_ptr z, double d)
+  {  __GMPXX_TMPZ_D;    eval (z, temp); }
+};
+
+
+/**************** Auxiliary classes ****************/
+
+/* this is much the same as gmp_allocated_string in gmp-impl.h
+   since gmp-impl.h is not publicly available, I redefine it here
+   I use a different name to avoid possible clashes */
+
+extern "C" {
+  typedef void (*__gmp_freefunc_t) (void *, size_t);
+}
+struct __gmp_alloc_cstring
+{
+  char *str;
+  __gmp_alloc_cstring(char *s) { str = s; }
+  ~__gmp_alloc_cstring()
+  {
+    __gmp_freefunc_t freefunc;
+    mp_get_memory_functions (NULL, NULL, &freefunc);
+    (*freefunc) (str, std::strlen(str)+1);
+  }
+};
+
+
+// general expression template class
+template <class T, class U>
+class __gmp_expr;
+
+
+// templates for resolving expression types
+template <class T>
+struct __gmp_resolve_ref
+{
+  typedef T ref_type;
+};
+
+template <class T, class U>
+struct __gmp_resolve_ref<__gmp_expr<T, U> >
+{
+  typedef const __gmp_expr<T, U> & ref_type;
+};
+
+
+template <class T, class U = T>
+struct __gmp_resolve_expr;
+
+template <>
+struct __gmp_resolve_expr<mpz_t>
+{
+  typedef mpz_t value_type;
+  typedef mpz_ptr ptr_type;
+  typedef mpz_srcptr srcptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t>
+{
+  typedef mpq_t value_type;
+  typedef mpq_ptr ptr_type;
+  typedef mpq_srcptr srcptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t>
+{
+  typedef mpf_t value_type;
+  typedef mpf_ptr ptr_type;
+  typedef mpf_srcptr srcptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpz_t, mpq_t>
+{
+  typedef mpq_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t, mpz_t>
+{
+  typedef mpq_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpz_t, mpf_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t, mpz_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t, mpf_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t, mpq_t>
+{
+  typedef mpf_t value_type;
+};
+
+#if __GMPXX_USE_CXX11
+namespace std {
+  template <class T, class U, class V, class W>
+  struct common_type <__gmp_expr<T, U>, __gmp_expr<V, W> >
+  {
+  private:
+    typedef typename __gmp_resolve_expr<T, V>::value_type X;
+  public:
+    typedef __gmp_expr<X, X> type;
+  };
+
+  template <class T, class U>
+  struct common_type <__gmp_expr<T, U> >
+  {
+    typedef __gmp_expr<T, T> type;
+  };
+
+#define __GMPXX_DECLARE_COMMON_TYPE(typ)	\
+  template <class T, class U>			\
+  struct common_type <__gmp_expr<T, U>, typ >	\
+  {						\
+    typedef __gmp_expr<T, T> type;		\
+  };						\
+						\
+  template <class T, class U>			\
+  struct common_type <typ, __gmp_expr<T, U> >	\
+  {						\
+    typedef __gmp_expr<T, T> type;		\
+  }
+
+  __GMPXX_DECLARE_COMMON_TYPE(signed char);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned char);
+  __GMPXX_DECLARE_COMMON_TYPE(signed int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned int);
+  __GMPXX_DECLARE_COMMON_TYPE(signed short int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned short int);
+  __GMPXX_DECLARE_COMMON_TYPE(signed long int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned long int);
+  __GMPXX_DECLARE_COMMON_TYPE(float);
+  __GMPXX_DECLARE_COMMON_TYPE(double);
+#undef __GMPXX_DECLARE_COMMON_TYPE
+}
+#endif
+
+// classes for evaluating unary and binary expressions
+template <class T, class Op>
+struct __gmp_unary_expr
+{
+  typename __gmp_resolve_ref<T>::ref_type val;
+
+  __gmp_unary_expr(const T &v) : val(v) { }
+private:
+  __gmp_unary_expr();
+};
+
+template <class T, class U, class Op>
+struct __gmp_binary_expr
+{
+  typename __gmp_resolve_ref<T>::ref_type val1;
+  typename __gmp_resolve_ref<U>::ref_type val2;
+
+  __gmp_binary_expr(const T &v1, const U &v2) : val1(v1), val2(v2) { }
+private:
+  __gmp_binary_expr();
+};
+
+
+
+/**************** Macros for in-class declarations ****************/
+/* This is just repetitive code that is easier to maintain if it's written
+   only once */
+
+#define __GMPP_DECLARE_COMPOUND_OPERATOR(fun)                         \
+  template <class T, class U>                                         \
+  __gmp_expr<value_type, value_type> & fun(const __gmp_expr<T, U> &);
+
+#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \
+  __gmp_expr & fun(signed char);              \
+  __gmp_expr & fun(unsigned char);            \
+  __gmp_expr & fun(signed int);               \
+  __gmp_expr & fun(unsigned int);             \
+  __gmp_expr & fun(signed short int);         \
+  __gmp_expr & fun(unsigned short int);       \
+  __gmp_expr & fun(signed long int);          \
+  __gmp_expr & fun(unsigned long int);        \
+  __gmp_expr & fun(float);                    \
+  __gmp_expr & fun(double);                   \
+  /* __gmp_expr & fun(long double); */
+
+#define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \
+__GMPP_DECLARE_COMPOUND_OPERATOR(fun)        \
+__GMPN_DECLARE_COMPOUND_OPERATOR(fun)
+
+#define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \
+  __gmp_expr & fun(mp_bitcnt_t);
+
+#define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \
+  inline __gmp_expr & fun();                  \
+  inline __gmp_expr fun(int);
+
+#define __GMPXX_DEFINE_ARITHMETIC_CONSTRUCTORS		\
+  __gmp_expr(signed char c) { init_si(c); }		\
+  __gmp_expr(unsigned char c) { init_ui(c); }		\
+  __gmp_expr(signed int i) { init_si(i); }		\
+  __gmp_expr(unsigned int i) { init_ui(i); }		\
+  __gmp_expr(signed short int s) { init_si(s); }	\
+  __gmp_expr(unsigned short int s) { init_ui(s); }	\
+  __gmp_expr(signed long int l) { init_si(l); }		\
+  __gmp_expr(unsigned long int l) { init_ui(l); }	\
+  __gmp_expr(float f) { init_d(f); }			\
+  __gmp_expr(double d) { init_d(d); }
+
+#define __GMPXX_DEFINE_ARITHMETIC_ASSIGNMENTS		\
+  __gmp_expr & operator=(signed char c) { assign_si(c); return *this; } \
+  __gmp_expr & operator=(unsigned char c) { assign_ui(c); return *this; } \
+  __gmp_expr & operator=(signed int i) { assign_si(i); return *this; } \
+  __gmp_expr & operator=(unsigned int i) { assign_ui(i); return *this; } \
+  __gmp_expr & operator=(signed short int s) { assign_si(s); return *this; } \
+  __gmp_expr & operator=(unsigned short int s) { assign_ui(s); return *this; } \
+  __gmp_expr & operator=(signed long int l) { assign_si(l); return *this; } \
+  __gmp_expr & operator=(unsigned long int l) { assign_ui(l); return *this; } \
+  __gmp_expr & operator=(float f) { assign_d(f); return *this; } \
+  __gmp_expr & operator=(double d) { assign_d(d); return *this; }
+
+#define __GMPP_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                 \
+template <class U>                                                           \
+static __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \
+fun(const __gmp_expr<T, U> &expr);
+
+#define __GMPNN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, bigtype) \
+static inline __gmp_expr<T, __gmp_unary_expr<bigtype, eval_fun> >            \
+fun(type expr);
+
+#define __GMPNS_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, signed long)
+#define __GMPNU_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, unsigned long)
+#define __GMPND_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, double)
+
+#define __GMPN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                 \
+__GMPNS_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed char)           \
+__GMPNU_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned char)         \
+__GMPNS_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed int)            \
+__GMPNU_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned int)          \
+__GMPNS_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed short int)      \
+__GMPNU_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned short int)    \
+__GMPNS_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed long int)       \
+__GMPNU_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned long int)     \
+__GMPND_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, float)                 \
+__GMPND_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, double)
+
+#define __GMP_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                  \
+__GMPP_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                         \
+__GMPN_DECLARE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)
+
+/**************** mpz_class -- wrapper for mpz_t ****************/
+
+template <>
+class __gmp_expr<mpz_t, mpz_t>
+{
+private:
+  typedef mpz_t value_type;
+  value_type mp;
+
+  // Helper functions used for all arithmetic types
+  void assign_ui(unsigned long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l == 0))
+      __get_mp()->_mp_size = 0;
+    else
+      mpz_set_ui(mp, l);
+  }
+  void assign_si(signed long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      assign_ui(l);
+    else if (__GMPXX_CONSTANT_TRUE(l <= 0))
+      {
+	assign_ui(-static_cast<unsigned long>(l));
+	mpz_neg(mp, mp);
+      }
+    else
+      mpz_set_si(mp, l);
+  }
+  void assign_d (double d)
+  {
+    mpz_set_d (mp, d);
+  }
+
+  void init_ui(unsigned long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l == 0))
+      mpz_init(mp);
+    else
+      mpz_init_set_ui(mp, l);
+  }
+  void init_si(signed long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      init_ui(l);
+    else if (__GMPXX_CONSTANT_TRUE(l <= 0))
+      {
+	init_ui(-static_cast<unsigned long>(l));
+	mpz_neg(mp, mp);
+      }
+    else
+      mpz_init_set_si(mp, l);
+  }
+  void init_d (double d)
+  {
+    mpz_init_set_d (mp, d);
+  }
+
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+
+  // constructors and destructor
+  __gmp_expr() __GMPXX_NOEXCEPT { mpz_init(mp); }
+
+  __gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&z) noexcept
+  { *__get_mp() = *z.__get_mp(); mpz_init(z.mp); }
+#endif
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+  { mpz_init(mp); __gmp_set_expr(mp, expr); }
+  template <class T, class U>
+  explicit __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpz_init(mp); __gmp_set_expr(mp, expr); }
+
+  __GMPXX_DEFINE_ARITHMETIC_CONSTRUCTORS
+
+  explicit __gmp_expr(const char *s, int base = 0)
+  {
+    if (mpz_init_set_str (mp, s, base) != 0)
+      {
+        mpz_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpz_set_str");
+#else
+        {
+          std::cerr << "std::invalid_argument: mpz_set_str\n";
+          std::abort();
+        }
+#endif
+      }
+  }
+  explicit __gmp_expr(const std::string &s, int base = 0)
+  {
+    if (mpz_init_set_str(mp, s.c_str(), base) != 0)
+      {
+        mpz_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpz_set_str");
+#else
+        {
+          std::cerr << "std::invalid_argument: mpz_set_str\n";
+          std::abort();
+        }
+#endif
+      }
+  }
+
+  explicit __gmp_expr(mpz_srcptr z) { mpz_init_set(mp, z); }
+
+  ~__gmp_expr() { mpz_clear(mp); }
+
+  void swap(__gmp_expr& z) __GMPXX_NOEXCEPT
+  { std::swap(*__get_mp(), *z.__get_mp()); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &z)
+  { mpz_set(mp, z.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&z) noexcept
+  { swap(z); return *this; }
+#endif
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __GMPXX_DEFINE_ARITHMETIC_ASSIGNMENTS
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpz_set_str (mp, s, 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpz_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpz_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpz_set_str(mp, s.c_str(), 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpz_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpz_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpz_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpz_set_str(mp, s.c_str(), base); }
+  std::string get_str(int base = 10) const
+  {
+    __gmp_alloc_cstring temp(mpz_get_str(0, base, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+  mpz_srcptr __get_mp() const { return mp; }
+  mpz_ptr __get_mp() { return mp; }
+  mpz_srcptr get_mpz_t() const { return mp; }
+  mpz_ptr get_mpz_t() { return mp; }
+
+  signed long int get_si() const { return mpz_get_si(mp); }
+  unsigned long int get_ui() const { return mpz_get_ui(mp); }
+  double get_d() const { return mpz_get_d(mp); }
+
+  // bool fits_schar_p() const { return mpz_fits_schar_p(mp); }
+  // bool fits_uchar_p() const { return mpz_fits_uchar_p(mp); }
+  bool fits_sint_p() const { return mpz_fits_sint_p(mp); }
+  bool fits_uint_p() const { return mpz_fits_uint_p(mp); }
+  bool fits_sshort_p() const { return mpz_fits_sshort_p(mp); }
+  bool fits_ushort_p() const { return mpz_fits_ushort_p(mp); }
+  bool fits_slong_p() const { return mpz_fits_slong_p(mp); }
+  bool fits_ulong_p() const { return mpz_fits_ulong_p(mp); }
+  // bool fits_float_p() const { return mpz_fits_float_p(mp); }
+  // bool fits_double_p() const { return mpz_fits_double_p(mp); }
+  // bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp); }
+
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return __get_mp()->_mp_size != 0; }
+#endif
+
+  // member operators
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator%=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator&=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator|=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator^=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+
+  __GMP_DECLARE_UNARY_STATIC_MEMFUN(mpz_t, factorial, __gmp_fac_function)
+  __GMP_DECLARE_UNARY_STATIC_MEMFUN(mpz_t, primorial, __gmp_primorial_function)
+  __GMP_DECLARE_UNARY_STATIC_MEMFUN(mpz_t, fibonacci, __gmp_fib_function)
+};
+
+typedef __gmp_expr<mpz_t, mpz_t> mpz_class;
+
+
+/**************** mpq_class -- wrapper for mpq_t ****************/
+
+template <>
+class __gmp_expr<mpq_t, mpq_t>
+{
+private:
+  typedef mpq_t value_type;
+  value_type mp;
+
+  // Helper functions used for all arithmetic types
+  void assign_ui(unsigned long l) { mpq_set_ui(mp, l, 1); }
+  void assign_si(signed long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      assign_ui(l);
+    else
+      mpq_set_si(mp, l, 1);
+  }
+  void assign_d (double d)        { mpq_set_d (mp, d); }
+
+  void init_ui(unsigned long l)	{ mpq_init(mp); get_num() = l; }
+  void init_si(signed long l)	{ mpq_init(mp); get_num() = l; }
+  void init_d (double d)	{ mpq_init(mp); assign_d (d); }
+
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+  void canonicalize() { mpq_canonicalize(mp); }
+
+  // constructors and destructor
+  __gmp_expr() { mpq_init(mp); }
+
+  __gmp_expr(const __gmp_expr &q)
+  {
+    mpz_init_set(mpq_numref(mp), mpq_numref(q.mp));
+    mpz_init_set(mpq_denref(mp), mpq_denref(q.mp));
+  }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&q)
+  { *mp = *q.mp; mpq_init(q.mp); }
+  __gmp_expr(mpz_class &&z)
+  {
+    *mpq_numref(mp) = *z.get_mpz_t();
+    mpz_init_set_ui(mpq_denref(mp), 1);
+    mpz_init(z.get_mpz_t());
+  }
+#endif
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpq_t, T> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
+  template <class T, class U>
+  explicit __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
+
+  __GMPXX_DEFINE_ARITHMETIC_CONSTRUCTORS
+
+  explicit __gmp_expr(const char *s, int base = 0)
+  {
+    mpq_init (mp);
+    // If s is the literal 0, we meant to call another constructor.
+    // If s just happens to evaluate to 0, we would crash, so whatever.
+    if (s == 0)
+      {
+	// Don't turn mpq_class(0,0) into 0
+	mpz_set_si(mpq_denref(mp), base);
+      }
+    else if (mpq_set_str(mp, s, base) != 0)
+      {
+        mpq_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpq_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpq_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+  explicit __gmp_expr(const std::string &s, int base = 0)
+  {
+    mpq_init(mp);
+    if (mpq_set_str (mp, s.c_str(), base) != 0)
+      {
+        mpq_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpq_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpq_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+  explicit __gmp_expr(mpq_srcptr q)
+  {
+    mpz_init_set(mpq_numref(mp), mpq_numref(q));
+    mpz_init_set(mpq_denref(mp), mpq_denref(q));
+  }
+
+  __gmp_expr(const mpz_class &num, const mpz_class &den)
+  {
+    mpz_init_set(mpq_numref(mp), num.get_mpz_t());
+    mpz_init_set(mpq_denref(mp), den.get_mpz_t());
+  }
+
+  ~__gmp_expr() { mpq_clear(mp); }
+
+  void swap(__gmp_expr& q) __GMPXX_NOEXCEPT { std::swap(*mp, *q.mp); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &q)
+  { mpq_set(mp, q.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&q) noexcept
+  { swap(q); return *this; }
+  __gmp_expr & operator=(mpz_class &&z) noexcept
+  { get_num() = std::move(z); get_den() = 1u; return *this; }
+#endif
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __GMPXX_DEFINE_ARITHMETIC_ASSIGNMENTS
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpq_set_str (mp, s, 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpq_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpq_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpq_set_str(mp, s.c_str(), 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpq_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpq_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpq_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpq_set_str(mp, s.c_str(), base); }
+  std::string get_str(int base = 10) const
+  {
+    __gmp_alloc_cstring temp(mpq_get_str(0, base, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+
+  // casting a reference to an mpz_t to mpz_class & is a dirty hack.
+  // It kind of works because the internal representation of mpz_class is
+  // exactly an mpz_t, but compilers are allowed to assume that mpq_class
+  // and mpz_class do not alias... In mpz_class, we avoid using mp directly,
+  // to reduce the risks of such problematic optimizations.
+  const mpz_class & get_num() const
+  { return reinterpret_cast<const mpz_class &>(*mpq_numref(mp)); }
+  mpz_class & get_num()
+  { return reinterpret_cast<mpz_class &>(*mpq_numref(mp)); }
+  const mpz_class & get_den() const
+  { return reinterpret_cast<const mpz_class &>(*mpq_denref(mp)); }
+  mpz_class & get_den()
+  { return reinterpret_cast<mpz_class &>(*mpq_denref(mp)); }
+
+  mpq_srcptr __get_mp() const { return mp; }
+  mpq_ptr __get_mp() { return mp; }
+  mpq_srcptr get_mpq_t() const { return mp; }
+  mpq_ptr get_mpq_t() { return mp; }
+
+  mpz_srcptr get_num_mpz_t() const { return mpq_numref(mp); }
+  mpz_ptr get_num_mpz_t() { return mpq_numref(mp); }
+  mpz_srcptr get_den_mpz_t() const { return mpq_denref(mp); }
+  mpz_ptr get_den_mpz_t() { return mpq_denref(mp); }
+
+  double get_d() const { return mpq_get_d(mp); }
+
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return mpq_numref(mp)->_mp_size != 0; }
+#endif
+
+  // compound assignments
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+};
+
+typedef __gmp_expr<mpq_t, mpq_t> mpq_class;
+
+
+/**************** mpf_class -- wrapper for mpf_t ****************/
+
+template <>
+class __gmp_expr<mpf_t, mpf_t>
+{
+private:
+  typedef mpf_t value_type;
+  value_type mp;
+
+  // Helper functions used for all arithmetic types
+  void assign_ui(unsigned long l) { mpf_set_ui(mp, l); }
+  void assign_si(signed long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      assign_ui(l);
+    else
+      mpf_set_si(mp, l);
+  }
+  void assign_d (double d)        { mpf_set_d (mp, d); }
+
+  void init_ui(unsigned long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l == 0))
+      mpf_init(mp);
+    else
+      mpf_init_set_ui(mp, l);
+  }
+  void init_si(signed long l)
+  {
+    if (__GMPXX_CONSTANT_TRUE(l >= 0))
+      init_ui(l);
+    else
+      mpf_init_set_si(mp, l);
+  }
+  void init_d (double d)	{ mpf_init_set_d (mp, d); }
+
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_prec(mp); }
+
+  void set_prec(mp_bitcnt_t prec) { mpf_set_prec(mp, prec); }
+  void set_prec_raw(mp_bitcnt_t prec) { mpf_set_prec_raw(mp, prec); }
+
+  // constructors and destructor
+  __gmp_expr() { mpf_init(mp); }
+
+  __gmp_expr(const __gmp_expr &f)
+  { mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&f)
+  { *mp = *f.mp; mpf_init2(f.mp, get_prec()); }
+#endif
+  __gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set(mp, f.mp); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpf_init2(mp, expr.get_prec()); __gmp_set_expr(mp, expr); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); __gmp_set_expr(mp, expr); }
+
+  __GMPXX_DEFINE_ARITHMETIC_CONSTRUCTORS
+
+  __gmp_expr(signed char c, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, c); }
+  __gmp_expr(unsigned char c, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, c); }
+
+  __gmp_expr(signed int i, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, i); }
+  __gmp_expr(unsigned int i, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, i); }
+
+  __gmp_expr(signed short int s, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, s); }
+  __gmp_expr(unsigned short int s, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, s); }
+
+  __gmp_expr(signed long int l, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, l); }
+  __gmp_expr(unsigned long int l, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, l); }
+
+  __gmp_expr(float f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_d(mp, f); }
+  __gmp_expr(double d, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_d(mp, d); }
+  // __gmp_expr(long double ld) { mpf_init_set_d(mp, ld); }
+  // __gmp_expr(long double ld, mp_bitcnt_t prec)
+  // { mpf_init2(mp, prec); mpf_set_d(mp, ld); }
+
+  explicit __gmp_expr(const char *s)
+  {
+    if (mpf_init_set_str (mp, s, 0) != 0)
+      {
+        mpf_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+  __gmp_expr(const char *s, mp_bitcnt_t prec, int base = 0)
+  {
+    mpf_init2(mp, prec);
+    if (mpf_set_str(mp, s, base) != 0)
+      {
+        mpf_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+  explicit __gmp_expr(const std::string &s)
+  {
+    if (mpf_init_set_str(mp, s.c_str(), 0) != 0)
+      {
+        mpf_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+  __gmp_expr(const std::string &s, mp_bitcnt_t prec, int base = 0)
+  {
+    mpf_init2(mp, prec);
+    if (mpf_set_str(mp, s.c_str(), base) != 0)
+      {
+        mpf_clear (mp);
+#ifdef __EXCEPTIONS
+        throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+      }
+  }
+
+  explicit __gmp_expr(mpf_srcptr f)
+  { mpf_init2(mp, mpf_get_prec(f)); mpf_set(mp, f); }
+  __gmp_expr(mpf_srcptr f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set(mp, f); }
+
+  ~__gmp_expr() { mpf_clear(mp); }
+
+  void swap(__gmp_expr& f) __GMPXX_NOEXCEPT { std::swap(*mp, *f.mp); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &f)
+  { mpf_set(mp, f.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&f) noexcept
+  { swap(f); return *this; }
+#endif
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __GMPXX_DEFINE_ARITHMETIC_ASSIGNMENTS
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpf_set_str (mp, s, 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpf_set_str(mp, s.c_str(), 0) != 0)
+#ifdef __EXCEPTIONS
+      throw std::invalid_argument ("mpf_set_str");
+#else
+      {
+        std::cerr << "std::invalid_argument: mpf_set_str\n";
+        std::abort();
+      }
+#endif
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpf_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpf_set_str(mp, s.c_str(), base); }
+  std::string get_str(mp_exp_t &expo, int base = 10, size_t size = 0) const
+  {
+    __gmp_alloc_cstring temp(mpf_get_str(0, &expo, base, size, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+  mpf_srcptr __get_mp() const { return mp; }
+  mpf_ptr __get_mp() { return mp; }
+  mpf_srcptr get_mpf_t() const { return mp; }
+  mpf_ptr get_mpf_t() { return mp; }
+
+  signed long int get_si() const { return mpf_get_si(mp); }
+  unsigned long int get_ui() const { return mpf_get_ui(mp); }
+  double get_d() const { return mpf_get_d(mp); }
+
+  // bool fits_schar_p() const { return mpf_fits_schar_p(mp); }
+  // bool fits_uchar_p() const { return mpf_fits_uchar_p(mp); }
+  bool fits_sint_p() const { return mpf_fits_sint_p(mp); }
+  bool fits_uint_p() const { return mpf_fits_uint_p(mp); }
+  bool fits_sshort_p() const { return mpf_fits_sshort_p(mp); }
+  bool fits_ushort_p() const { return mpf_fits_ushort_p(mp); }
+  bool fits_slong_p() const { return mpf_fits_slong_p(mp); }
+  bool fits_ulong_p() const { return mpf_fits_ulong_p(mp); }
+  // bool fits_float_p() const { return mpf_fits_float_p(mp); }
+  // bool fits_double_p() const { return mpf_fits_double_p(mp); }
+  // bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp); }
+
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return mpf_sgn(mp) != 0; }
+#endif
+
+  // compound assignments
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+};
+
+typedef __gmp_expr<mpf_t, mpf_t> mpf_class;
+
+
+
+/**************** User-defined literals ****************/
+
+#if __GMPXX_USE_CXX11
+inline mpz_class operator"" _mpz(const char* s)
+{
+  return mpz_class(s);
+}
+
+inline mpq_class operator"" _mpq(const char* s)
+{
+  mpq_class q;
+  q.get_num() = s;
+  return q;
+}
+
+inline mpf_class operator"" _mpf(const char* s)
+{
+  return mpf_class(s);
+}
+#endif
+
+/**************** I/O operators ****************/
+
+// these should (and will) be provided separately
+
+template <class T, class U>
+inline std::ostream & operator<<
+(std::ostream &o, const __gmp_expr<T, U> &expr)
+{
+  __gmp_expr<T, T> const& temp(expr);
+  return o << temp.__get_mp();
+}
+
+template <class T>
+inline std::istream & operator>>(std::istream &i, __gmp_expr<T, T> &expr)
+{
+  return i >> expr.__get_mp();
+}
+
+/*
+// you might want to uncomment this
+inline std::istream & operator>>(std::istream &i, mpq_class &q)
+{
+  i >> q.get_mpq_t();
+  q.canonicalize();
+  return i;
+}
+*/
+
+
+/**************** Functions for type conversion ****************/
+
+inline void __gmp_set_expr(mpz_ptr z, const mpz_class &w)
+{
+  mpz_set(z, w.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpz_t, T> &expr)
+{
+  expr.eval(z);
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpq_t, T> &expr)
+{
+  mpq_class const& temp(expr);
+  mpz_set_q(z, temp.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpf_t, T> &expr)
+{
+  mpf_class const& temp(expr);
+  mpz_set_f(z, temp.get_mpf_t());
+}
+
+inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)
+{
+  mpq_set_z(q, z.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpz_t, T> &expr)
+{
+  __gmp_set_expr(mpq_numref(q), expr);
+  mpz_set_ui(mpq_denref(q), 1);
+}
+
+inline void __gmp_set_expr(mpq_ptr q, const mpq_class &r)
+{
+  mpq_set(q, r.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpq_t, T> &expr)
+{
+  expr.eval(q);
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpf_t, T> &expr)
+{
+  mpf_class const& temp(expr);
+  mpq_set_f(q, temp.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpz_t, T> &expr)
+{
+  mpz_class const& temp(expr);
+  mpf_set_z(f, temp.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpq_t, T> &expr)
+{
+  mpq_class const& temp(expr);
+  mpf_set_q(f, temp.get_mpq_t());
+}
+
+inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)
+{
+  mpf_set(f, g.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpf_t, T> &expr)
+{
+  expr.eval(f);
+}
+
+
+/* Temporary objects */
+
+template <class T>
+class __gmp_temp
+{
+  __gmp_expr<T, T> val;
+  public:
+  template<class U, class V>
+  __gmp_temp(U const& u, V) : val (u) {}
+  typename __gmp_resolve_expr<T>::srcptr_type
+  __get_mp() const { return val.__get_mp(); }
+};
+
+template <>
+class __gmp_temp <mpf_t>
+{
+  mpf_class val;
+  public:
+  template<class U>
+  __gmp_temp(U const& u, mpf_ptr res) : val (u, mpf_get_prec(res)) {}
+  mpf_srcptr __get_mp() const { return val.__get_mp(); }
+};
+
+/**************** Specializations of __gmp_expr ****************/
+/* The eval() method of __gmp_expr<T, U> evaluates the corresponding
+   expression and assigns the result to its argument, which is either an
+   mpz_t, mpq_t, or mpf_t as specified by the T argument.
+   Compound expressions are evaluated recursively (temporaries are created
+   to hold intermediate values), while for simple expressions the eval()
+   method of the appropriate function object (available as the Op argument
+   of either __gmp_unary_expr<T, Op> or __gmp_binary_expr<T, U, Op>) is
+   called. */
+
+
+/**************** Unary expressions ****************/
+/* cases:
+   - simple:   argument is mp*_class, that is, __gmp_expr<T, T>
+   - compound: argument is __gmp_expr<T, U> (with U not equal to T) */
+
+
+// simple expressions
+
+template <class T, class Op>
+class __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val_type;
+
+  __gmp_unary_expr<val_type, Op> expr;
+public:
+  explicit __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { Op::eval(p, expr.val.__get_mp()); }
+  const val_type & get_val() const { return expr.val; }
+  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
+};
+
+
+// simple expressions, U is a built-in numerical type
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_unary_expr<U, Op> >
+{
+private:
+  typedef U val_type;
+
+  __gmp_unary_expr<val_type, Op> expr;
+public:
+  explicit __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { Op::eval(p, expr.val); }
+  const val_type & get_val() const { return expr.val; }
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+};
+
+
+// compound expressions
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val_type;
+
+  __gmp_unary_expr<val_type, Op> expr;
+public:
+  explicit __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { expr.val.eval(p); Op::eval(p, p); }
+  const val_type & get_val() const { return expr.val; }
+  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
+};
+
+
+/**************** Binary expressions ****************/
+/* simple:
+   - arguments are both mp*_class
+   - one argument is mp*_class, one is a built-in type
+   compound:
+   - one is mp*_class, one is __gmp_expr<T, U>
+   - one is __gmp_expr<T, U>, one is built-in
+   - both arguments are __gmp_expr<...> */
+
+
+// simple expressions
+
+template <class T, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+// simple expressions, U is a built-in numerical type
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef U val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { Op::eval(p, expr.val1.__get_mp(), expr.val2); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef U val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { Op::eval(p, expr.val1, expr.val2.__get_mp()); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
+};
+
+
+// compound expressions, one argument is a subexpression
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<U, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    if(p != expr.val1.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val2);
+      Op::eval(p, expr.val1.__get_mp(), p);
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val2, p);
+      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    }
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<U, V> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    if(p != expr.val2.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val1);
+      Op::eval(p, p, expr.val2.__get_mp());
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val1, p);
+      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    }
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<T, U> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    if(p != expr.val1.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val2);
+      Op::eval(p, expr.val1.__get_mp(), p);
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val2, p);
+      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    }
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    if(p != expr.val2.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val1);
+      Op::eval(p, p, expr.val2.__get_mp());
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val1, p);
+      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    }
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+// one argument is a subexpression, one is a built-in
+
+template <class T, class U, class V, class Op>
+class __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef V val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    expr.val1.eval(p);
+    Op::eval(p, p, expr.val2);
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+{
+private:
+  typedef U val1_type;
+  typedef __gmp_expr<T, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    expr.val2.eval(p);
+    Op::eval(p, expr.val1, p);
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
+};
+
+
+// both arguments are subexpressions
+
+template <class T, class U, class V, class W, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<V, W> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_temp<T> temp2(expr.val2, p);
+    expr.val1.eval(p);
+    Op::eval(p, p, temp2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class W, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+{
+private:
+  typedef __gmp_expr<U, V> val1_type;
+  typedef __gmp_expr<T, W> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_temp<T> temp1(expr.val1, p);
+    expr.val2.eval(p);
+    Op::eval(p, temp1.__get_mp(), p);
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<T, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_temp<T> temp2(expr.val2, p);
+    expr.val1.eval(p);
+    Op::eval(p, p, temp2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  mp_bitcnt_t get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+/**************** Special cases ****************/
+
+/* Some operations (i.e., add and subtract) with mixed mpz/mpq arguments
+   can be done directly without first converting the mpz to mpq.
+   Appropriate specializations of __gmp_expr are required. */
+
+
+#define __GMPZQ_DEFINE_EXPR(eval_fun)                                       \
+                                                                            \
+template <>                                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr<mpz_class, mpq_class, eval_fun> > \
+{                                                                           \
+private:                                                                    \
+  typedef mpz_class val1_type;                                              \
+  typedef mpq_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  { eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); }      \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <>                                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr<mpq_class, mpz_class, eval_fun> > \
+{                                                                           \
+private:                                                                    \
+  typedef mpq_class val1_type;                                              \
+  typedef mpz_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  { eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); }      \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpq_t, T>, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef mpz_class val1_type;                                              \
+  typedef __gmp_expr<mpq_t, T> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpq_class temp(expr.val2);                                              \
+    eval_fun::eval(q, expr.val1.get_mpz_t(), temp.get_mpq_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<mpq_class, __gmp_expr<mpz_t, T>, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef mpq_class val1_type;                                              \
+  typedef __gmp_expr<mpz_t, T> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp(expr.val2);                                              \
+    eval_fun::eval(q, expr.val1.get_mpq_t(), temp.get_mpz_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpq_class, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpz_t, T> val1_type;                                   \
+  typedef mpq_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp(expr.val1);                                              \
+    eval_fun::eval(q, temp.get_mpz_t(), expr.val2.get_mpq_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<__gmp_expr<mpq_t, T>, mpz_class, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpq_t, T> val1_type;                                   \
+  typedef mpz_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpq_class temp(expr.val1);                                              \
+    eval_fun::eval(q, temp.get_mpq_t(), expr.val2.get_mpz_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T, class U>                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr                                   \
+<__gmp_expr<mpz_t, T>, __gmp_expr<mpq_t, U>, eval_fun> >                    \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpz_t, T> val1_type;                                   \
+  typedef __gmp_expr<mpq_t, U> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp1(expr.val1);                                             \
+    expr.val2.eval(q);                                                      \
+    eval_fun::eval(q, temp1.get_mpz_t(), q);                                \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};                                                                          \
+                                                                            \
+template <class T, class U>                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr                                   \
+<__gmp_expr<mpq_t, T>, __gmp_expr<mpz_t, U>, eval_fun> >                    \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpq_t, T> val1_type;                                   \
+  typedef __gmp_expr<mpz_t, U> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp2(expr.val2);                                             \
+    expr.val1.eval(q);                                             \
+    eval_fun::eval(q, q, temp2.get_mpz_t());                \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
+};
+
+
+__GMPZQ_DEFINE_EXPR(__gmp_binary_plus)
+__GMPZQ_DEFINE_EXPR(__gmp_binary_minus)
+
+
+
+/**************** Macros for defining functions ****************/
+/* Results of operators and functions are instances of __gmp_expr<T, U>.
+   T determines the numerical type of the expression: it can be either
+   mpz_t, mpq_t, or mpf_t.  When the arguments of a binary
+   expression have different numerical types, __gmp_resolve_expr is used
+   to determine the "larger" type.
+   U is either __gmp_unary_expr<V, Op> or __gmp_binary_expr<V, W, Op>,
+   where V and W are the arguments' types -- they can in turn be
+   expressions, thus allowing to build compound expressions to any
+   degree of complexity.
+   Op is a function object that must have an eval() method accepting
+   appropriate arguments.
+   Actual evaluation of a __gmp_expr<T, U> object is done when it gets
+   assigned to an mp*_class ("lazy" evaluation): this is done by calling
+   its eval() method. */
+
+
+// non-member unary operators and functions
+
+#define __GMP_DEFINE_UNARY_FUNCTION(fun, eval_fun)                           \
+                                                                             \
+template <class T, class U>                                                  \
+inline __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \
+fun(const __gmp_expr<T, U> &expr)                                            \
+{                                                                            \
+  return __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >(expr); \
+}
+
+// variant that only works for one of { mpz, mpq, mpf }
+
+#define __GMP_DEFINE_UNARY_FUNCTION_1(T, fun, eval_fun)                      \
+                                                                             \
+template <class U>                                                           \
+inline __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \
+fun(const __gmp_expr<T, U> &expr)                                            \
+{                                                                            \
+  return __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >(expr); \
+}
+
+#define __GMP_DEFINE_UNARY_TYPE_FUNCTION(type, fun, eval_fun) \
+                                                              \
+template <class T, class U>                                   \
+inline type fun(const __gmp_expr<T, U> &expr)                 \
+{                                                             \
+  __gmp_expr<T, T> const& temp(expr); \
+  return eval_fun::eval(temp.__get_mp());                     \
+}
+
+
+// non-member binary operators and functions
+
+#define __GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)                   \
+                                                                       \
+template <class T, class U, class V, class W>                          \
+inline __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,       \
+__gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> >      \
+fun(const __gmp_expr<T, U> &expr1, const __gmp_expr<V, W> &expr2)      \
+{                                                                      \
+  return __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,     \
+     __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> > \
+    (expr1, expr2);                                                    \
+}
+
+#define __GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, bigtype)       \
+                                                                           \
+template <class T, class U>                                                \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >               \
+fun(const __gmp_expr<T, U> &expr, type t)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >(expr, t); \
+}                                                                          \
+                                                                           \
+template <class T, class U>                                                \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >               \
+fun(type t, const __gmp_expr<T, U> &expr)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >(t, expr); \
+}
+
+#define __GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)          \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, signed long int)
+
+#define __GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)            \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, unsigned long int)
+
+#define __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, double)
+
+#define __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)     \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, long double)
+
+#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)              \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float)              \
+__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double)             \
+/* __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double) */
+
+#define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \
+__GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)        \
+__GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)
+
+// variant that only works for one of { mpz, mpq, mpf }
+
+#define __GMPP_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun)              \
+                                                                       \
+template <class U, class W>                                            \
+inline __gmp_expr<T,                                                   \
+__gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, W>, eval_fun> >      \
+fun(const __gmp_expr<T, U> &expr1, const __gmp_expr<T, W> &expr2)      \
+{                                                                      \
+  return __gmp_expr<T,                                                 \
+     __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, W>, eval_fun> > \
+    (expr1, expr2);                                                    \
+}
+
+#define __GMPNN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type, bigtype)  \
+                                                                           \
+template <class U>                                                         \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >               \
+fun(const __gmp_expr<T, U> &expr, type t)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >(expr, t); \
+}                                                                          \
+                                                                           \
+template <class U>                                                         \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >               \
+fun(type t, const __gmp_expr<T, U> &expr)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >(t, expr); \
+}
+
+#define __GMPNS_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type)          \
+__GMPNN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type, signed long int)
+
+#define __GMPNU_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type)          \
+__GMPNN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type, unsigned long int)
+
+#define __GMPND_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type) \
+__GMPNN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type, double)
+
+#define __GMPNLD_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type)     \
+__GMPNN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, type, long double)
+
+#define __GMPN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun)              \
+__GMPNS_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, float)              \
+__GMPND_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, double)             \
+/* __GMPNLD_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun, long double) */
+
+#define __GMP_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun) \
+__GMPP_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun)        \
+__GMPN_DEFINE_BINARY_FUNCTION_1(T, fun, eval_fun)
+
+
+#define __GMP_DEFINE_BINARY_FUNCTION_UI(fun, eval_fun)                 \
+                                                                       \
+template <class T, class U>                                            \
+inline __gmp_expr                                                      \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> > \
+fun(const __gmp_expr<T, U> &expr, mp_bitcnt_t l)                 \
+{                                                                      \
+  return __gmp_expr<T, __gmp_binary_expr                               \
+    <__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> >(expr, l);        \
+}
+
+
+#define __GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)         \
+                                                                        \
+template <class T, class U, class V, class W>                           \
+inline type fun(const __gmp_expr<T, U> &expr1,                          \
+		const __gmp_expr<V, W> &expr2)                          \
+{                                                                       \
+  __gmp_expr<T, T> const& temp1(expr1);                                 \
+  __gmp_expr<V, V> const& temp2(expr2);                                 \
+  return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp());            \
+}
+
+#define __GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,   \
+					    type2, bigtype)        \
+                                                                   \
+template <class T, class U>                                        \
+inline type fun(const __gmp_expr<T, U> &expr, type2 t)             \
+{                                                                  \
+  __gmp_expr<T, T> const& temp(expr);      \
+  return eval_fun::eval(temp.__get_mp(), static_cast<bigtype>(t)); \
+}                                                                  \
+                                                                   \
+template <class T, class U>                                        \
+inline type fun(type2 t, const __gmp_expr<T, U> &expr)             \
+{                                                                  \
+  __gmp_expr<T, T> const& temp(expr);      \
+  return eval_fun::eval(static_cast<bigtype>(t), temp.__get_mp()); \
+}
+
+#define __GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \
+				    type2, signed long int)
+
+#define __GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \
+				    type2, unsigned long int)
+
+#define __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, double)
+
+#define __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2)     \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, long double)
+
+#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)              \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float)              \
+__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double)             \
+/* __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double) */
+
+#define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \
+__GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)        \
+__GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)
+
+
+// member operators
+
+#define __GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)                 \
+                                                                             \
+template <class T, class U>                                                  \
+inline type##_class & type##_class::fun(const __gmp_expr<T, U> &expr)        \
+{                                                                            \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr                  \
+		 <type##_class, __gmp_expr<T, U>, eval_fun> >(*this, expr)); \
+  return *this;                                                              \
+}
+
+#define __GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,    \
+					 type2, bigtype)         \
+                                                                 \
+inline type##_class & type##_class::fun(type2 t)                 \
+{                                                                \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr      \
+		 <type##_class, bigtype, eval_fun> >(*this, t)); \
+  return *this;                                                  \
+}
+
+#define __GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \
+				 type2, signed long int)
+
+#define __GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \
+				 type2, unsigned long int)
+
+#define __GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, double)
+
+#define __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2)     \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, long double)
+
+#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)              \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float)              \
+__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double)             \
+/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */
+
+#define __GMP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \
+__GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)        \
+__GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)
+
+#define __GMPZ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpf, fun, eval_fun)
+
+
+
+#define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun)  \
+                                                                \
+inline type##_class & type##_class::fun(mp_bitcnt_t l)    \
+{                                                               \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr     \
+    <type##_class, mp_bitcnt_t, eval_fun> >(*this, l));   \
+  return *this;                                                 \
+}
+
+#define __GMPZ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpf, fun, eval_fun)
+
+
+
+#define __GMP_DEFINE_INCREMENT_OPERATOR(type, fun, eval_fun) \
+                                                             \
+inline type##_class & type##_class::fun()                    \
+{                                                            \
+  eval_fun::eval(mp);                                        \
+  return *this;                                              \
+}                                                            \
+                                                             \
+inline type##_class type##_class::fun(int)                   \
+{                                                            \
+  type##_class temp(*this);                                  \
+  eval_fun::eval(mp);                                        \
+  return temp;                                               \
+}
+
+#define __GMPZ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpf, fun, eval_fun)
+
+
+#define __GMPP_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                  \
+template <class U>                                                           \
+__gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \
+fun(const __gmp_expr<T, U> &expr)                                            \
+{                                                                            \
+  return __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >(expr); \
+}
+
+#define __GMPNN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, bigtype)  \
+inline __gmp_expr<T, __gmp_unary_expr<bigtype, eval_fun> >                   \
+fun(type expr)                                                               \
+{                                                                            \
+  return __gmp_expr<T, __gmp_unary_expr<bigtype, eval_fun> >(expr);          \
+}
+
+#define __GMPNS_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, signed long)
+#define __GMPNU_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, unsigned long)
+#define __GMPND_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type)  \
+__GMPNN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, type, double)
+
+#define __GMPN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                 \
+__GMPNS_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed char)           \
+__GMPNU_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned char)         \
+__GMPNS_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed int)            \
+__GMPNU_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned int)          \
+__GMPNS_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed short int)      \
+__GMPNU_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned short int)    \
+__GMPNS_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, signed long int)       \
+__GMPNU_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, unsigned long int)     \
+__GMPND_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, float)                 \
+__GMPND_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun, double)                \
+
+#define __GMP_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                  \
+__GMPP_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                         \
+__GMPN_DEFINE_UNARY_STATIC_MEMFUN(T, fun, eval_fun)                         \
+
+
+/**************** Arithmetic operators and functions ****************/
+
+// non-member operators and functions
+
+__GMP_DEFINE_UNARY_FUNCTION(operator+, __gmp_unary_plus)
+__GMP_DEFINE_UNARY_FUNCTION(operator-, __gmp_unary_minus)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpz_t, operator~, __gmp_unary_com)
+
+__GMP_DEFINE_BINARY_FUNCTION(operator+, __gmp_binary_plus)
+__GMP_DEFINE_BINARY_FUNCTION(operator-, __gmp_binary_minus)
+__GMP_DEFINE_BINARY_FUNCTION(operator*, __gmp_binary_multiplies)
+__GMP_DEFINE_BINARY_FUNCTION(operator/, __gmp_binary_divides)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, operator%, __gmp_binary_modulus)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, operator&, __gmp_binary_and)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, operator|, __gmp_binary_ior)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, operator^, __gmp_binary_xor)
+
+__GMP_DEFINE_BINARY_FUNCTION_UI(operator<<, __gmp_binary_lshift)
+__GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift)
+
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, ! __gmp_binary_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, ! __gmp_binary_greater)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, ! __gmp_binary_less)
+
+__GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpf_t, trunc, __gmp_trunc_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpf_t, floor, __gmp_floor_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpf_t, ceil, __gmp_ceil_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpf_t, sqrt, __gmp_sqrt_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpz_t, sqrt, __gmp_sqrt_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpz_t, factorial, __gmp_fac_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpz_t, primorial, __gmp_primorial_function)
+__GMP_DEFINE_UNARY_FUNCTION_1(mpz_t, fibonacci, __gmp_fib_function)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpf_t, hypot, __gmp_hypot_function)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, gcd, __gmp_gcd_function)
+__GMP_DEFINE_BINARY_FUNCTION_1(mpz_t, lcm, __gmp_lcm_function)
+
+__GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function)
+
+template <class T>
+void swap(__gmp_expr<T, T>& x, __gmp_expr<T, T>& y) __GMPXX_NOEXCEPT
+{ x.swap(y); }
+
+// member operators for mpz_class
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator%=, __gmp_binary_modulus)
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator&=, __gmp_binary_and)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator|=, __gmp_binary_ior)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator^=, __gmp_binary_xor)
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPZ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPZ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+__GMP_DEFINE_UNARY_STATIC_MEMFUN(mpz_t, mpz_class::factorial, __gmp_fac_function)
+__GMP_DEFINE_UNARY_STATIC_MEMFUN(mpz_t, mpz_class::primorial, __gmp_primorial_function)
+__GMP_DEFINE_UNARY_STATIC_MEMFUN(mpz_t, mpz_class::fibonacci, __gmp_fib_function)
+
+// member operators for mpq_class
+
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+
+__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPQ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPQ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+// member operators for mpf_class
+
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+
+__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPF_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPF_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+
+
+/**************** Class wrapper for gmp_randstate_t ****************/
+
+class __gmp_urandomb_value { };
+class __gmp_urandomm_value { };
+
+template <>
+class __gmp_expr<mpz_t, __gmp_urandomb_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  mp_bitcnt_t bits;
+public:
+  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
+  void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); }
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+};
+
+template <>
+class __gmp_expr<mpz_t, __gmp_urandomm_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  mpz_class range;
+public:
+  __gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { }
+  void eval(mpz_ptr z) const
+  { __gmp_rand_function::eval(z, state, range.get_mpz_t()); }
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+};
+
+template <>
+class __gmp_expr<mpf_t, __gmp_urandomb_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  mp_bitcnt_t bits;
+public:
+  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
+  void eval(mpf_ptr f) const
+  {
+    __gmp_rand_function::eval(f, state,
+	(bits>0) ? bits : mpf_get_prec(f));
+  }
+  mp_bitcnt_t get_prec() const
+  {
+    if (bits == 0)
+      return mpf_get_default_prec();
+    else
+      return bits;
+  }
+};
+
+extern "C" {
+  typedef void __gmp_randinit_default_t (gmp_randstate_t);
+  typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, mp_bitcnt_t);
+  typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, mp_bitcnt_t);
+}
+
+class gmp_randclass
+{
+private:
+  gmp_randstate_t state;
+
+  // copy construction and assignment not allowed
+  gmp_randclass(const gmp_randclass &);
+  void operator=(const gmp_randclass &);
+public:
+  // constructors and destructor
+  gmp_randclass(gmp_randalg_t alg, unsigned long int size)
+  {
+    switch (alg)
+      {
+      case GMP_RAND_ALG_LC: // no other cases for now
+      default:
+	gmp_randinit(state, alg, size);
+	break;
+      }
+  }
+
+  // gmp_randinit_default
+  gmp_randclass(__gmp_randinit_default_t* f) { f(state); }
+
+  // gmp_randinit_lc_2exp
+  gmp_randclass(__gmp_randinit_lc_2exp_t* f,
+		mpz_class z, unsigned long int l1, mp_bitcnt_t l2)
+  { f(state, z.get_mpz_t(), l1, l2); }
+
+  // gmp_randinit_lc_2exp_size
+  gmp_randclass(__gmp_randinit_lc_2exp_size_t* f,
+		mp_bitcnt_t size)
+  {
+    if (f (state, size) == 0)
+#ifdef __EXCEPTIONS
+      throw std::length_error ("gmp_randinit_lc_2exp_size");
+#else
+      {
+        std::cerr << "std::length_error: gmp_randinit_lc_2exp_size\n";
+        std::abort();
+      }
+#endif
+  }
+
+  ~gmp_randclass() { gmp_randclear(state); }
+
+  // initialize
+  void seed(); // choose a random seed some way (?)
+  void seed(unsigned long int s) { gmp_randseed_ui(state, s); }
+  void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); }
+
+  // get random number
+  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(mp_bitcnt_t l)
+  { return __gmp_expr<mpz_t, __gmp_urandomb_value>(state, l); }
+  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(const mpz_class &z)
+  { return get_z_bits(z.get_ui()); }
+  // FIXME: z.get_bitcnt_t() ?
+
+  __gmp_expr<mpz_t, __gmp_urandomm_value> get_z_range(const mpz_class &z)
+  { return __gmp_expr<mpz_t, __gmp_urandomm_value>(state, z); }
+
+  __gmp_expr<mpf_t, __gmp_urandomb_value> get_f(mp_bitcnt_t prec = 0)
+  { return __gmp_expr<mpf_t, __gmp_urandomb_value>(state, prec); }
+};
+
+
+/**************** Specialize std::numeric_limits ****************/
+
+namespace std {
+  template <> class numeric_limits<mpz_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpz_class min() { return mpz_class(); }
+    static mpz_class max() { return mpz_class(); }
+    static mpz_class lowest() { return mpz_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = true;
+    static const bool is_exact = true;
+    static const int radix = 2;
+    static mpz_class epsilon() { return mpz_class(); }
+    static mpz_class round_error() { return mpz_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpz_class infinity() { return mpz_class(); }
+    static mpz_class quiet_NaN() { return mpz_class(); }
+    static mpz_class signaling_NaN() { return mpz_class(); }
+    static mpz_class denorm_min() { return mpz_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_toward_zero;
+  };
+
+  template <> class numeric_limits<mpq_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpq_class min() { return mpq_class(); }
+    static mpq_class max() { return mpq_class(); }
+    static mpq_class lowest() { return mpq_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = false;
+    static const bool is_exact = true;
+    static const int radix = 2;
+    static mpq_class epsilon() { return mpq_class(); }
+    static mpq_class round_error() { return mpq_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpq_class infinity() { return mpq_class(); }
+    static mpq_class quiet_NaN() { return mpq_class(); }
+    static mpq_class signaling_NaN() { return mpq_class(); }
+    static mpq_class denorm_min() { return mpq_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_toward_zero;
+  };
+
+  template <> class numeric_limits<mpf_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpf_class min() { return mpf_class(); }
+    static mpf_class max() { return mpf_class(); }
+    static mpf_class lowest() { return mpf_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = false;
+    static const bool is_exact = false;
+    static const int radix = 2;
+    static mpf_class epsilon() { return mpf_class(); }
+    static mpf_class round_error() { return mpf_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpf_class infinity() { return mpf_class(); }
+    static mpf_class quiet_NaN() { return mpf_class(); }
+    static mpf_class signaling_NaN() { return mpf_class(); }
+    static mpf_class denorm_min() { return mpf_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_indeterminate;
+  };
+}
+
+
+/**************** #undef all private macros ****************/
+
+#undef __GMPP_DECLARE_COMPOUND_OPERATOR
+#undef __GMPN_DECLARE_COMPOUND_OPERATOR
+#undef __GMP_DECLARE_COMPOUND_OPERATOR
+#undef __GMP_DECLARE_COMPOUND_OPERATOR_UI
+#undef __GMP_DECLARE_INCREMENT_OPERATOR
+#undef __GMPXX_DEFINE_ARITHMETIC_CONSTRUCTORS
+#undef __GMPXX_DEFINE_ARITHMETIC_ASSIGNMENTS
+
+#undef __GMPZQ_DEFINE_EXPR
+
+#undef __GMP_DEFINE_UNARY_FUNCTION_1
+#undef __GMP_DEFINE_UNARY_FUNCTION
+#undef __GMP_DEFINE_UNARY_TYPE_FUNCTION
+
+#undef __GMPP_DEFINE_BINARY_FUNCTION
+#undef __GMPNN_DEFINE_BINARY_FUNCTION
+#undef __GMPNS_DEFINE_BINARY_FUNCTION
+#undef __GMPNU_DEFINE_BINARY_FUNCTION
+#undef __GMPND_DEFINE_BINARY_FUNCTION
+#undef __GMPNLD_DEFINE_BINARY_FUNCTION
+#undef __GMPN_DEFINE_BINARY_FUNCTION
+#undef __GMP_DEFINE_BINARY_FUNCTION
+
+#undef __GMP_DEFINE_BINARY_FUNCTION_UI
+
+#undef __GMPP_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNN_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNS_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNU_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPND_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPN_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMP_DEFINE_BINARY_TYPE_FUNCTION
+
+#undef __GMPZ_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMPP_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNN_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNS_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNU_DEFINE_COMPOUND_OPERATOR
+#undef __GMPND_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNLD_DEFINE_COMPOUND_OPERATOR
+#undef __GMPN_DEFINE_COMPOUND_OPERATOR
+#undef __GMP_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMPQ_DEFINE_COMPOUND_OPERATOR
+#undef __GMPF_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMP_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPZ_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPQ_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPF_DEFINE_COMPOUND_OPERATOR_UI
+
+#undef __GMP_DEFINE_INCREMENT_OPERATOR
+#undef __GMPZ_DEFINE_INCREMENT_OPERATOR
+#undef __GMPQ_DEFINE_INCREMENT_OPERATOR
+#undef __GMPF_DEFINE_INCREMENT_OPERATOR
+
+#undef __GMPXX_CONSTANT_TRUE
+#undef __GMPXX_CONSTANT
+
+#endif /* __GMP_PLUSPLUS__ */

diff --git a/invalid.c b/invalid.c
new file mode 100644
index 0000000..e09eab2
--- /dev/null
+++ b/invalid.c

@@ -0,0 +1,82 @@
+/* __gmp_invalid_operation -- invalid floating point operation.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <signal.h>
+#include <stdlib.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for getpid */
+#endif
+
+#include "gmp-impl.h"
+
+
+/* Incidentally, kill is not available on mingw, but that's ok, it has raise
+   and we'll be using that.  */
+#if ! HAVE_RAISE
+#define raise(sig)   kill (getpid(), sig)
+#endif
+
+
+/* __gmp_invalid_operation is for an invalid floating point operation, like
+   mpz_set_d on a NaN or Inf.  It's done as a subroutine to minimize code in
+   places raising an exception.
+
+   feraiseexcept(FE_INVALID) is not used here, since unfortunately on most
+   systems it would require libm.
+
+   Alternatives:
+
+   It might be possible to check whether a hardware "invalid operation" trap
+   is enabled or not before raising a signal.  This would require all
+   callers to be prepared to continue with some bogus result.  Bogus returns
+   are bad, but presumably an application disabling the trap is prepared for
+   that.
+
+   On some systems (eg. BSD) the signal handler can find out the reason for
+   a SIGFPE (overflow, invalid, div-by-zero, etc).  Perhaps we could get
+   that into our raise too.
+
+   i386 GLIBC implements feraiseexcept(FE_INVALID) with an asm fdiv 0/0.
+   That would both respect the exceptions mask and give a reason code in a
+   BSD signal.  */
+
+void
+__gmp_invalid_operation (void)
+{
+  raise (SIGFPE);
+  abort ();
+}

diff --git a/longlong.h b/longlong.h
new file mode 100644
index 0000000..be1c3cb
--- /dev/null
+++ b/longlong.h

@@ -0,0 +1,2289 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+
+Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* You have to define the following before including this file:
+
+   UWtype -- An unsigned type, default type for operations (typically a "word")
+   UHWtype -- An unsigned type, at least half the size of UWtype
+   UDWtype -- An unsigned type, at least twice as large a UWtype
+   W_TYPE_SIZE -- size in bits of UWtype
+
+   SItype, USItype -- Signed and unsigned 32 bit types
+   DItype, UDItype -- Signed and unsigned 64 bit types
+
+   On a 32 bit machine UWtype should typically be USItype;
+   on a 64 bit machine, UWtype should typically be UDItype.
+
+   Optionally, define:
+
+   LONGLONG_STANDALONE -- Avoid code that needs machine-dependent support files
+   NO_ASM -- Disable inline asm
+
+
+   CAUTION!  Using this version of longlong.h outside of GMP is not safe.  You
+   need to include gmp.h and gmp-impl.h, or certain things might not work as
+   expected.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+   that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
+   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
+   word product in HIGH_PROD and LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+   UDWtype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+   is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from the
+   msb to the first non-zero bit in the UWtype X.  This is the number of
+   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+   from the least significant end.
+
+   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two UWtype integers, composed by
+   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+   (i.e. carry out) is not stored anywhere, and is lost.
+
+   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.
+
+
+   Notes:
+
+   For add_ssaaaa the two high and two low addends can both commute, but
+   unfortunately gcc only supports one "%" commutative in each asm block.
+   This has always been so but is only documented in recent versions
+   (eg. pre-release 3.3).  Having two or more "%"s can cause an internal
+   compiler error in certain rare circumstances.
+
+   Apparently it was only the last "%" that was ever actually respected, so
+   the code has been updated to leave just that.  Clearly there's a free
+   choice whether high or low should get it, if there's a reason to favour
+   one over the other.  Also obviously when the constraints on the two
+   operands are identical there's no benefit to the reloader in any "%" at
+   all.
+
+   */
+
+/* The CPUs come in alphabetical order below.
+
+   Please add support for more CPUs here, or improve the current support
+   for the CPUs below!  */
+
+
+/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc
+   3.4 __builtin_clzl or __builtin_clzll, according to our limb size.
+   Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or
+   __builtin_ctzll.
+
+   These builtins are only used when we check what code comes out, on some
+   chips they're merely libgcc calls, where we will instead want an inline
+   in that case (either asm or generic C).
+
+   These builtins are better than an asm block of the same insn, since an
+   asm block doesn't give gcc any information about scheduling or resource
+   usage.  We keep an asm block for use on prior versions of gcc though.
+
+   For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but
+   it's not used (for count_leading_zeros) because it generally gives extra
+   code to ensure the result is 0 when the input is 0, which we don't need
+   or want.  */
+
+#ifdef _LONG_LONG_LIMB
+#define count_leading_zeros_gcc_clz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_clzll (x);		\
+  } while (0)
+#else
+#define count_leading_zeros_gcc_clz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_clzl (x);		\
+  } while (0)
+#endif
+
+#ifdef _LONG_LONG_LIMB
+#define count_trailing_zeros_gcc_ctz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_ctzll (x);		\
+  } while (0)
+#else
+#define count_trailing_zeros_gcc_ctz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_ctzl (x);		\
+  } while (0)
+#endif
+
+
+/* FIXME: The macros using external routines like __MPN(count_leading_zeros)
+   don't need to be under !NO_ASM */
+#if ! defined (NO_ASM)
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+/* Most alpha-based machines, except Cray systems. */
+#if defined (__GNUC__)
+#if __GMP_GNUC_PREREQ (3,3)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = __builtin_alpha_umulh (__m0, __m1);				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#else
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("umulh %r1,%2,%0"						\
+	     : "=r" (ph)						\
+	     : "%rJ" (__m0), "rI" (__m1));				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif
+#else /* ! __GNUC__ */
+#include <machine/builtins.h>
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = __UMULH (__m0, __m1);					\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;							\
+    __di = __MPN(invert_limb) (d);					\
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif /* LONGLONG_STANDALONE */
+
+/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm
+   always goes into libgmp.so, even when not actually used.  */
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+
+#if defined (__GNUC__) && HAVE_HOST_CPU_alpha_CIX
+#define count_leading_zeros(COUNT,X) \
+  __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
+#define count_trailing_zeros(COUNT,X) \
+  __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
+#endif /* clz/ctz using cix */
+
+#if ! defined (count_leading_zeros)				\
+  && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
+/* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
+   "$31" is written explicitly in the asm, since an "r" constraint won't
+   select reg 31.  There seems no need to worry about "r31" syntax for cray,
+   since gcc itself (pre-release 3.4) emits just $31 in various places.	 */
+#define ALPHA_CMPBGE_0(dst, src)					\
+  do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
+/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
+   them, locating the highest non-zero byte.  A second __clz_tab lookup
+   counts the leading zero bits in that byte, giving the result.  */
+#define count_leading_zeros(count, x)					\
+  do {									\
+    UWtype  __clz__b, __clz__c, __clz__x = (x);				\
+    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);	    /* zero bytes */	\
+    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */	\
+    __clz__b = __clz__b * 8 - 7;		    /* 57 to 1 shift */ \
+    __clz__x >>= __clz__b;						\
+    __clz__c = __clz_tab [__clz__x];		    /* 8 to 1 bit */	\
+    __clz__b = 65 - __clz__b;						\
+    (count) = __clz__b - __clz__c;					\
+  } while (0)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif /* clz using cmpbge */
+
+#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)
+#if HAVE_ATTRIBUTE_CONST
+long __MPN(count_leading_zeros) (UDItype) __attribute__ ((const));
+#else
+long __MPN(count_leading_zeros) (UDItype);
+#endif
+#define count_leading_zeros(count, x) \
+  ((count) = __MPN(count_leading_zeros) (x))
+#endif /* clz using mpn */
+#endif /* __alpha */
+
+#if defined (__AVR) && W_TYPE_SIZE == 8
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    unsigned short __p = (unsigned short) (m0) * (m1);			\
+    (ph) = __p >> 8;							\
+    (pl) = __p;								\
+  } while (0)
+#endif /* AVR */
+
+#if defined (_CRAY) && W_TYPE_SIZE == 64
+#include <intrinsics.h>
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+long __MPN(count_leading_zeros) (UDItype);
+#define count_leading_zeros(count, x) \
+  ((count) = _leadz ((UWtype) (x)))
+#if defined (_CRAYIEEE)		/* I.e., Cray T90/ieee, T3D, and T3E */
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = _int_mult_upper (__m0, __m1);				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;							\
+    __di = __MPN(invert_limb) (d);					\
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
+  } while (0)
+#endif /* LONGLONG_STANDALONE */
+#endif /* _CRAYIEEE */
+#endif /* _CRAY */
+
+#if defined (__ia64) && W_TYPE_SIZE == 64
+/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
+   "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
+   code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
+   register, which takes an extra cycle.  */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)      \
+  do {						\
+    UWtype __x;					\
+    __x = (al) - (bl);				\
+    if ((al) < (bl))				\
+      (sh) = (ah) - (bh) - 1;			\
+    else					\
+      (sh) = (ah) - (bh);			\
+    (sl) = __x;					\
+  } while (0)
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
+/* Do both product parts in assembly, since that gives better code with
+   all gcc versions.  Some callers will just use the upper part, and in
+   that situation we waste an instruction, but not any cycles.  */
+#define umul_ppmm(ph, pl, m0, m1) \
+    __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"		\
+	     : "=&f" (ph), "=f" (pl)					\
+	     : "f" (m0), "f" (m1))
+#define count_leading_zeros(count, x) \
+  do {									\
+    UWtype _x = (x), _y, _a, _c;					\
+    __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));		\
+    __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));		\
+    _c = (_a - 1) << 3;							\
+    _x >>= _c;								\
+    if (_x >= 1 << 4)							\
+      _x >>= 4, _c += 4;						\
+    if (_x >= 1 << 2)							\
+      _x >>= 2, _c += 2;						\
+    _c += _x >> 1;							\
+    (count) =  W_TYPE_SIZE - 1 - _c;					\
+  } while (0)
+/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
+   based, and we don't need a special case for x==0 here */
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    UWtype __ctz_x = (x);						\
+    __asm__ ("popcnt %0 = %1"						\
+	     : "=r" (count)						\
+	     : "r" ((__ctz_x-1) & ~__ctz_x));				\
+  } while (0)
+#endif
+#if defined (__INTEL_COMPILER)
+#include <ia64intrin.h>
+#define umul_ppmm(ph, pl, m0, m1)					\
+  do {									\
+    UWtype __m0 = (m0), __m1 = (m1);					\
+    ph = _m64_xmahu (__m0, __m1, 0);					\
+    pl = __m0 * __m1;							\
+  } while (0)
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;							\
+    __di = __MPN(invert_limb) (d);					\
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif
+#endif
+
+
+#if defined (__GNUC__)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+   understood by gcc1.  Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("multiplu %0,%1,%2"					\
+	     : "=r" (xl)						\
+	     : "r" (__m0), "r" (__m1));					\
+    __asm__ ("multmu %0,%1,%2"						\
+	     : "=r" (xh)						\
+	     : "r" (__m0), "r" (__m1));					\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("dividu %0,%3,%4"						\
+	   : "=r" (q), "=q" (r)						\
+	   : "1" (n1), "r" (n0), "r" (d))
+#define count_leading_zeros(count, x) \
+    __asm__ ("clz %0,%1"						\
+	     : "=r" (count)						\
+	     : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined (__arc__)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
+	   : "=r" (sh),							\
+	     "=&r" (sl)							\
+	   : "r"  ((USItype) (ah)),					\
+	     "rICal" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rICal" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	   : "=r" (sh),							\
+	     "=&r" (sl)							\
+	   : "r" ((USItype) (ah)),					\
+	     "rICal" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rICal" ((USItype) (bl)))
+#endif
+
+#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
+    && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bl) && -(USItype)(bl) < (USItype)(bl))	\
+      __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh),					\
+		 "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC);	\
+    else								\
+      __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC);	\
+  } while (0)
+/* FIXME: Extend the immediate range for the low word by using both ADDS and
+   SUBS, since they set carry in the same way.  We need separate definitions
+   for thumb and non-thumb since thumb lacks RSC.  */
+#if defined (__thumb__)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && __builtin_constant_p (bh)		\
+	&& (ah) == (bh))						\
+      __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0"			\
+	       : "=r" (sh), "=r" (sl)					\
+	       : "r" (al), "rI" (bl) __CLOBBER_CC);			\
+    else if (__builtin_constant_p (al))					\
+      __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+    else								\
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+    } while (0)
+#else
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && __builtin_constant_p (bh)		\
+	&& (ah) == (bh))						\
+      __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0"			\
+	       : "=r" (sh), "=r" (sl)					\
+	       : "r" (al), "rI" (bl) __CLOBBER_CC);			\
+    else if (__builtin_constant_p (al))					\
+      {									\
+	if (__builtin_constant_p (ah))					\
+	  __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"		\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+	else								\
+	  __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"		\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+      }									\
+    else if (__builtin_constant_p (ah))					\
+      {									\
+	if (__builtin_constant_p (bl))					\
+	  __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"		\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+	else								\
+	  __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"		\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+      }									\
+    else								\
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+    } while (0)
+#endif
+#if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \
+    || defined (__ARM_ARCH_3__)
+#define umul_ppmm(xh, xl, a, b)						\
+  do {									\
+    register USItype __t0, __t1, __t2;					\
+    __asm__ ("%@ Inlined umul_ppmm\n"					\
+	   "	mov	%2, %5, lsr #16\n"				\
+	   "	mov	%0, %6, lsr #16\n"				\
+	   "	bic	%3, %5, %2, lsl #16\n"				\
+	   "	bic	%4, %6, %0, lsl #16\n"				\
+	   "	mul	%1, %3, %4\n"					\
+	   "	mul	%4, %2, %4\n"					\
+	   "	mul	%3, %0, %3\n"					\
+	   "	mul	%0, %2, %0\n"					\
+	   "	adds	%3, %4, %3\n"					\
+	   "	addcs	%0, %0, #65536\n"				\
+	   "	adds	%1, %1, %3, lsl #16\n"				\
+	   "	adc	%0, %0, %3, lsr #16"				\
+	   : "=&r" ((USItype) (xh)), "=r" ((USItype) (xl)),		\
+	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
+	   : "r" ((USItype) (a)), "r" ((USItype) (b)) __CLOBBER_CC);	\
+  } while (0)
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __r;							\
+    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));			\
+    (r) = __r;								\
+  } while (0)
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
+#endif /* LONGLONG_STANDALONE */
+#else /* ARMv4 or newer */
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define smul_ppmm(xh, xl, a, b) \
+  __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;							\
+    __di = __MPN(invert_limb) (d);					\
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);				\
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif /* LONGLONG_STANDALONE */
+#endif /* defined(__ARM_ARCH_2__) ... */
+#define count_leading_zeros(count, x)  count_leading_zeros_gcc_clz(count, x)
+#define count_trailing_zeros(count, x)  count_trailing_zeros_gcc_ctz(count, x)
+#endif /* __arm__ */
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl))	\
+      __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
+		 "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
+    else								\
+      __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl))	\
+      __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
+	       : "=r,r" (sh), "=&r,&r" (sl)				\
+	       : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),	\
+		 "r,Z"   ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\
+    else								\
+      __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
+	       : "=r,r" (sh), "=&r,&r" (sl)				\
+	       : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),	\
+		 "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) __CLOBBER_CC);\
+  } while(0);
+#if __GMP_GNUC_PREREQ (4,9)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
+    w1 = __ll >> 64;							\
+    w0 = __ll;								\
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (__m0), "r" (__m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif
+#define count_leading_zeros(count, x)  count_leading_zeros_gcc_clz(count, x)
+#define count_trailing_zeros(count, x)  count_trailing_zeros_gcc_ctz(count, x)
+#endif /* __aarch64__ */
+
+#if defined (__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __x;							\
+  __asm__ ("mulwux %2,%0"						\
+	   : "=r" (__x.__ll)						\
+	   : "%0" ((USItype)(u)), "r" ((USItype)(v)));			\
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define smul_ppmm(w1, w0, u, v) \
+  ({union {DItype __ll;							\
+	   struct {SItype __l, __h;} __i;				\
+	  } __x;							\
+  __asm__ ("mulwx %2,%0"						\
+	   : "=r" (__x.__ll)						\
+	   : "%0" ((SItype)(u)), "r" ((SItype)(v)));			\
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("mulwux %2,%0"						\
+	     : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v)));	\
+    __w; })
+#endif /* __clipper__ */
+
+/* Fujitsu vector computers.  */
+#if defined (__uxp__) && W_TYPE_SIZE == 32
+#define umul_ppmm(ph, pl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mult.lu %1,%2,%0"	: "=r" (__x.__ll) : "%r" (u), "rK" (v));\
+    (ph) = __x.__i.__h;							\
+    (pl) = __x.__i.__l;							\
+  } while (0)
+#define smul_ppmm(ph, pl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));	\
+    (ph) = __x.__i.__h;							\
+    (pl) = __x.__i.__l;							\
+  } while (0)
+#endif
+
+#if defined (__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.w %5,%1\n\taddx %3,%0"					\
+	   : "=g" (sh), "=&g" (sl)					\
+	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.w %5,%1\n\tsubx %3,%0"					\
+	   : "=g" (sh), "=&g" (sl)					\
+	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+  __asm__ ("mulx %3,%0,%1"						\
+	   : "=g" (ph), "=r" (pl)					\
+	   : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("divx %4,%0,%1"						\
+	   : "=g" (q), "=r" (r)						\
+	   : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+  __asm__ ("bsch/1 %1,%0"						\
+	   : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
+#endif
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%I5 %5,%r4,%1\n\taddc %r2,%r3,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%I4 %4,%r5,%1\n\tsubb %r2,%r3,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v));	\
+    (wh) = __x.__i.__h;							\
+    (wl) = __x.__i.__l;							\
+  } while (0)
+#endif
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __tmp;							\
+    __asm__ (								\
+       "ldi		1,%0\n"						\
+"	extru,=		%1,15,16,%%r0	; Bits 31..16 zero?\n"		\
+"	extru,tr	%1,15,16,%1	; No.  Shift down, skip add.\n"	\
+"	ldo		16(%0),%0	; Yes.  Perform add.\n"		\
+"	extru,=		%1,23,8,%%r0	; Bits 15..8 zero?\n"		\
+"	extru,tr	%1,23,8,%1	; No.  Shift down, skip add.\n"	\
+"	ldo		8(%0),%0	; Yes.  Perform add.\n"		\
+"	extru,=		%1,27,4,%%r0	; Bits 7..4 zero?\n"		\
+"	extru,tr	%1,27,4,%1	; No.  Shift down, skip add.\n"	\
+"	ldo		4(%0),%0	; Yes.  Perform add.\n"		\
+"	extru,=		%1,29,2,%%r0	; Bits 3..2 zero?\n"		\
+"	extru,tr	%1,29,2,%1	; No.  Shift down, skip add.\n"	\
+"	ldo		2(%0),%0	; Yes.  Perform add.\n"		\
+"	extru		%1,30,1,%1	; Extract bit 1.\n"		\
+"	sub		%0,%1,%0	; Subtract it.\n"		\
+	: "=r" (count), "=r" (__tmp) : "1" (x));			\
+  } while (0)
+#endif /* hppa */
+
+/* These macros are for ABI=2.0w.  In ABI=2.0n they can't be used, since GCC
+   (3.2) puts longlong into two adjacent 32-bit registers.  Presumably this
+   is just a case of no direct support for 2.0n but treating it like 1.0. */
+#if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%I5 %5,%r4,%1\n\tadd,dc %r2,%r3,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%I4 %4,%r5,%1\n\tsub,db %r2,%r3,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
+#endif /* hppa */
+
+#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#if defined (__zarch__) || defined (HAVE_HOST_CPU_s390_zarch)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  do {									\
+/*  if (__builtin_constant_p (bl))					\
+      __asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3"				\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "0"  (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\
+    else								\
+*/    __asm__ ("alr\t%1,%5\n\talcr\t%0,%3"				\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "0"  (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC);	\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  do {									\
+/*  if (__builtin_constant_p (bl))					\
+      __asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3"				\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC);	\
+    else								\
+*/    __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3"				\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC);	\
+  } while (0)
+#if __GMP_GNUC_PREREQ (4,5)
+#define umul_ppmm(xh, xl, m0, m1)					\
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__ll = (UDItype) (m0) * (UDItype) (m1);				\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#else
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
+   with a new enough processor pretending we have 32-bit registers.  */
+#define umul_ppmm(xh, xl, m0, m1)					\
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mlr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "%0" (m0), "r" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#else
+#define umul_ppmm(xh, xl, m0, m1)					\
+  do {									\
+  /* When we have 64-bit regs and gcc is aware of that, we cannot simply use
+     DImode for the product, since that would be allocated to a single 64-bit
+     register, whereas mlr uses the low 32-bits of an even-odd register pair.
+  */									\
+    register USItype __r0 __asm__ ("0");				\
+    register USItype __r1 __asm__ ("1") = (m0);				\
+    __asm__ ("mlr\t%0,%3"						\
+	     : "=r" (__r0), "=r" (__r1)					\
+	     : "r" (__r1), "r" (m1));					\
+    (xh) = __r0; (xl) = __r1;						\
+  } while (0)
+#endif /* if 0 */
+#endif
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
+   with a new enough processor pretending we have 32-bit registers.  */
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dlr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" (d));				\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    register USItype __r0 __asm__ ("0") = (n1);				\
+    register USItype __r1 __asm__ ("1") = (n0);				\
+    __asm__ ("dlr\t%0,%4"						\
+	     : "=r" (__r0), "=r" (__r1)					\
+	     : "r" (__r0), "r" (__r1), "r" (d));			\
+    (q) = __r1; (r) = __r0;						\
+  } while (0)
+#endif /* if 0 */
+#else /* if __zarch__ */
+/* FIXME: this fails if gcc knows about the 64-bit registers.  */
+#define smul_ppmm(xh, xl, m0, m1)					\
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "%0" (m0), "r" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+/* FIXME: this fails if gcc knows about the 64-bit registers.  */
+#define sdiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" (d));				\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#endif /* if __zarch__ */
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+/* We need to cast operands with register constraints, otherwise their types
+   will be assumed to be SImode by gcc.  For these machines, such operations
+   will insert a value into the low 32 bits, and leave the high 32 bits with
+   garbage.  */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    __asm__ ("algr\t%1,%5\n\talcgr\t%0,%3"				\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "0"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
+		 "%1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    __asm__ ("slgr\t%1,%5\n\tslbgr\t%0,%3"				\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "0" ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
+	       "1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC);	\
+  } while (0)
+#if !defined (__clang__)
+#define umul_ppmm(xh, xl, m0, m1)					\
+  do {									\
+    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
+	   struct {UDItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mlgr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "%0" ((UDItype)(m0)), "r" ((UDItype)(m1)));		\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
+	   struct {UDItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dlgr\t%0,%2"						\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" ((UDItype)(d)));			\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#endif
+#if 0 /* FIXME: Enable for z10 (?) */
+#define count_leading_zeros(cnt, x)					\
+  do {									\
+    union {unsigned int __attribute__ ((mode(TI))) __ll;		\
+	   struct {UDItype __h, __l;} __i;				\
+	  } __clr_cnt;							\
+    __asm__ ("flogr\t%0,%1"						\
+	     : "=r" (__clr_cnt.__ll)					\
+	     : "r" (x) __CLOBBER_CC);					\
+    (cnt) = __clr_cnt.__i.__h;						\
+  } while (0)
+#endif
+#endif
+
+/* On x86 and x86_64, every asm implicitly clobbers "flags" and "fpsr",
+   so we don't need __CLOBBER_CC.  */
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl %5,%k1\n\tadcl %3,%k0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl %5,%k1\n\tsbbl %3,%k0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mull %3"							\
+	   : "=a" (w0), "=d" (w1)					\
+	   : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
+  __asm__ ("divl %4"		     /* stringification in K&R C */	\
+	   : "=a" (q), "=d" (r)						\
+	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
+
+#if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium || HAVE_HOST_CPU_pentiummmx
+/* Pentium bsrl takes between 10 and 72 cycles depending where the most
+   significant 1 bit is, hence the use of the following alternatives.  bsfl
+   is slow too, between 18 and 42 depending where the least significant 1
+   bit is, so let the generic count_trailing_zeros below make use of the
+   count_leading_zeros here too.  */
+
+#if HAVE_HOST_CPU_pentiummmx && ! defined (LONGLONG_STANDALONE)
+/* The following should be a fixed 14 or 15 cycles, but possibly plus an L1
+   cache miss reading from __clz_tab.  For P55 it's favoured over the float
+   below so as to avoid mixing MMX and x87, since the penalty for switching
+   between the two is about 100 cycles.
+
+   The asm block sets __shift to -3 if the high 24 bits are clear, -2 for
+   16, -1 for 8, or 0 otherwise.  This could be written equivalently as
+   follows, but as of gcc 2.95.2 it results in conditional jumps.
+
+       __shift = -(__n < 0x1000000);
+       __shift -= (__n < 0x10000);
+       __shift -= (__n < 0x100);
+
+   The middle two sbbl and cmpl's pair, and with luck something gcc
+   generates might pair with the first cmpl and the last sbbl.  The "32+1"
+   constant could be folded into __clz_tab[], but it doesn't seem worth
+   making a different table just for that.  */
+
+#define count_leading_zeros(c,n)					\
+  do {									\
+    USItype  __n = (n);							\
+    USItype  __shift;							\
+    __asm__ ("cmpl  $0x1000000, %1\n"					\
+	     "sbbl  %0, %0\n"						\
+	     "cmpl  $0x10000, %1\n"					\
+	     "sbbl  $0, %0\n"						\
+	     "cmpl  $0x100, %1\n"					\
+	     "sbbl  $0, %0\n"						\
+	     : "=&r" (__shift) : "r"  (__n));				\
+    __shift = __shift*8 + 24 + 1;					\
+    (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift];			\
+  } while (0)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_0   31   /* n==0 indistinguishable from n==1 */
+
+#else /* ! pentiummmx || LONGLONG_STANDALONE */
+/* The following should be a fixed 14 cycles or so.  Some scheduling
+   opportunities should be available between the float load/store too.  This
+   sort of code is used in gcc 3 for __builtin_ffs (with "n&-n") and is
+   apparently suggested by the Intel optimizing manual (don't know exactly
+   where).  gcc 2.95 or up will be best for this, so the "double" is
+   correctly aligned on the stack.  */
+#define count_leading_zeros(c,n)					\
+  do {									\
+    union {								\
+      double    d;							\
+      unsigned  a[2];							\
+    } __u;								\
+    __u.d = (UWtype) (n);						\
+    (c) = 0x3FF + 31 - (__u.a[1] >> 20);				\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0   (0x3FF + 31)
+#endif /* pentiummx */
+
+#else /* ! pentium */
+
+#if __GMP_GNUC_PREREQ (3,4)  /* using bsrl */
+#define count_leading_zeros(count,x)  count_leading_zeros_gcc_clz(count,x)
+#endif /* gcc clz */
+
+/* On P6, gcc prior to 3.0 generates a partial register stall for
+   __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
+   being 1 code byte smaller.  "31-__cbtmp" is a workaround, probably at the
+   cost of one extra instruction.  Do this for "i386" too, since that means
+   generic x86.  */
+#if ! defined (count_leading_zeros) && __GNUC__ < 3			\
+  && (HAVE_HOST_CPU_i386						\
+      || HAVE_HOST_CPU_i686						\
+      || HAVE_HOST_CPU_pentiumpro					\
+      || HAVE_HOST_CPU_pentium2						\
+      || HAVE_HOST_CPU_pentium3)
+#define count_leading_zeros(count, x)					\
+  do {									\
+    USItype __cbtmp;							\
+    ASSERT ((x) != 0);							\
+    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));	\
+    (count) = 31 - __cbtmp;						\
+  } while (0)
+#endif /* gcc<3 asm bsrl */
+
+#ifndef count_leading_zeros
+#define count_leading_zeros(count, x)					\
+  do {									\
+    USItype __cbtmp;							\
+    ASSERT ((x) != 0);							\
+    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));	\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#endif /* asm bsrl */
+
+#if __GMP_GNUC_PREREQ (3,4)  /* using bsfl */
+#define count_trailing_zeros(count,x)  count_trailing_zeros_gcc_ctz(count,x)
+#endif /* gcc ctz */
+
+#ifndef count_trailing_zeros
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    ASSERT ((x) != 0);							\
+    __asm__ ("bsfl %1,%k0" : "=r" (count) : "rm" ((USItype)(x)));	\
+  } while (0)
+#endif /* asm bsfl */
+
+#endif /* ! pentium */
+
+#endif /* 80x86 */
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addq %5,%q1\n\tadcq %3,%q0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0"  ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
+	     "%1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subq %5,%q1\n\tsbbq %3,%q0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
+	     "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#if X86_ASM_MULX \
+   && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
+       || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulx\t%3, %q0, %q1"						\
+	   : "=r" (w0), "=r" (w1)					\
+	   : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulq\t%3"							\
+	   : "=a" (w0), "=d" (w1)					\
+	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#endif
+#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
+  __asm__ ("divq %4"		     /* stringification in K&R C */	\
+	   : "=a" (q), "=d" (r)						\
+	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+  || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2	\
+  || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen	\
+  || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x)					\
+  do {									\
+    /* This is lzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
+#define count_leading_zeros(count, x)					\
+  do {									\
+    UDItype __cbtmp;							\
+    ASSERT ((x) != 0);							\
+    __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
+    (count) = __cbtmp ^ 63;						\
+  } while (0)
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+  || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    /* This is tzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    ASSERT ((x) != 0);							\
+    __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#endif
+#endif /* __amd64__ */
+
+#if defined (__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r,h,l,c) \
+  __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0"				\
+	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __x;							\
+  __asm__ ("emul %2,%1,%0"						\
+	   : "=d" (__x.__ll) : "%dI" (u), "dI" (v));			\
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v));	\
+    __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __nn;							\
+    __nn.__i.__h = (nh); __nn.__i.__l = (nl);				\
+    __asm__ ("ediv %d,%n,%0"						\
+	   : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d));		\
+    (r) = __rq.__i.__l; (q) = __rq.__i.__h;				\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x));		\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+#if defined (__i960mx)		/* what is the proper symbol to test??? */
+#define rshift_rhlc(r,h,l,c) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __nn;							\
+    __nn.__i.__h = (h); __nn.__i.__l = (l);				\
+    __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c));	\
+  }
+#endif /* i960mx */
+#endif /* i960 */
+
+
+#if defined (__loongarch64) && W_TYPE_SIZE == 64
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __u = (u), __v = (v);					\
+    (w0) = __u * __v;							\
+    (w1) = (unsigned __int128__) __u * __v >> 64;			\
+  } while (0)
+#endif
+
+
+#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
+     || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
+     || defined (__mc5307__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
+	   : "=d" (sh), "=&d" (sl)					\
+	   : "0"  ((USItype)(ah)), "d" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
+	   : "=d" (sh), "=&d" (sl)					\
+	   : "0" ((USItype)(ah)), "d" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
+#if defined (__mc68020__) || defined(mc68020) \
+     || defined (__mc68030__) || defined (mc68030) \
+     || defined (__mc68040__) || defined (mc68040) \
+     || defined (__mcpu32__) || defined (mcpu32) \
+     || defined (__NeXT__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"						\
+	   : "=d" (w0), "=d" (w1)					\
+	   : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divu%.l %4,%1:%0"						\
+	   : "=d" (q), "=d" (r)						\
+	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divs%.l %4,%1:%0"						\
+	   : "=d" (q), "=d" (r)						\
+	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#else /* for other 68k family members use 16x16->32 multiplication */
+#define umul_ppmm(xh, xl, a, b) \
+  do { USItype __umul_tmp1, __umul_tmp2;				\
+	__asm__ ("| Inlined umul_ppmm\n"				\
+"	move%.l	%5,%3\n"						\
+"	move%.l	%2,%0\n"						\
+"	move%.w	%3,%1\n"						\
+"	swap	%3\n"							\
+"	swap	%0\n"							\
+"	mulu%.w	%2,%1\n"						\
+"	mulu%.w	%3,%0\n"						\
+"	mulu%.w	%2,%3\n"						\
+"	swap	%2\n"							\
+"	mulu%.w	%5,%2\n"						\
+"	add%.l	%3,%2\n"						\
+"	jcc	1f\n"							\
+"	add%.l	%#0x10000,%0\n"						\
+"1:	move%.l	%2,%3\n"						\
+"	clr%.w	%2\n"							\
+"	swap	%2\n"							\
+"	swap	%3\n"							\
+"	clr%.w	%3\n"							\
+"	add%.l	%3,%1\n"						\
+"	addx%.l	%2,%0\n"						\
+"	| End inlined umul_ppmm"					\
+	      : "=&d" (xh), "=&d" (xl),					\
+		"=&d" (__umul_tmp1), "=&d" (__umul_tmp2)			\
+	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));		\
+  } while (0)
+#endif /* not mc68020 */
+/* The '020, '030, '040 and '060 have bitfield insns.
+   GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to
+   exclude bfffo on that chip (bitfield insns not available).  */
+#if (defined (__mc68020__) || defined (mc68020)    \
+     || defined (__mc68030__) || defined (mc68030) \
+     || defined (__mc68040__) || defined (mc68040) \
+     || defined (__mc68060__) || defined (mc68060) \
+     || defined (__NeXT__))			   \
+  && ! defined (__mcpu32__)
+#define count_leading_zeros(count, x) \
+  __asm__ ("bfffo %1{%b2:%b2},%0"					\
+	   : "=d" (count)						\
+	   : "od" ((USItype) (x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x));			\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v));	\
+    (wh) = __x.__i.__h;							\
+    (wl) = __x.__i.__l;							\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x, __q;							\
+  __x.__i.__h = (n1); __x.__i.__l = (n0);				\
+  __asm__ ("divu.d %0,%1,%2"						\
+	   : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));		\
+  (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips) && W_TYPE_SIZE == 32
+#if __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __ll = (UDItype)(u) * (v);					\
+    w1 = __ll >> 32;							\
+    w0 = __ll;								\
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1"				\
+	   : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#endif /* __mips */
+
+#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if defined (_MIPS_ARCH_MIPS64R6)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __m0 = (u), __m1 = (v);					\
+    (w0) = __m0 * __m1;							\
+    __asm__ ("dmuhu\t%0, %1, %2" : "=d" (w1) : "d" (__m0), "d" (__m1));	\
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
+    w1 = __ll >> 64;							\
+    w0 = __ll;								\
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmultu %2,%3"						\
+	   : "=l" (w0), "=h" (w1)					\
+	   : "d" ((UDItype)(u)), "d" ((UDItype)(v)))
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1"				\
+	   : "=d" (w0), "=d" (w1)					\
+	   : "d" ((UDItype)(u)), "d" ((UDItype)(v)))
+#endif
+#endif /* __mips */
+
+#if defined (__mmix__) && W_TYPE_SIZE == 64
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("MULU %0,%2,%3" : "=r" (w0), "=z" (w1) : "r" (u), "r" (v))
+#endif
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __x;							\
+  __asm__ ("meid %2,%0"							\
+	   : "=g" (__x.__ll)						\
+	   : "%0" ((USItype)(u)), "g" ((USItype)(v)));			\
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("meid %2,%0"						\
+	     : "=g" (__w)						\
+	     : "%0" ((USItype)(u)), "g" ((USItype)(v)));		\
+    __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __x;							\
+  __x.__i.__h = (n1); __x.__i.__l = (n0);				\
+  __asm__ ("deid %2,%0"							\
+	   : "=g" (__x.__ll)						\
+	   : "0" (__x.__ll), "g" ((USItype)(d)));			\
+  (r) = __x.__i.__l; (q) = __x.__i.__h; })
+#define count_trailing_zeros(count,x) \
+  do {									\
+    __asm__ ("ffsd	%2,%0"						\
+	     : "=r" (count)						\
+	     : "0" ((USItype) 0), "r" ((USItype) (x)));			\
+  } while (0)
+#endif /* __ns32000__ */
+
+/* In the past we had a block of various #defines tested
+       _ARCH_PPC    - AIX
+       _ARCH_PWR    - AIX
+       __powerpc__  - gcc
+       __POWERPC__  - BEOS
+       __ppc__      - Darwin
+       PPC          - old gcc, GNU/Linux, SysV
+   The plain PPC test was not good for vxWorks, since PPC is defined on all
+   CPUs there (eg. m68k too), as a constant one is expected to compare
+   CPU_FAMILY against.
+
+   At any rate, this was pretty unattractive and a bit fragile.  The use of
+   HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of
+   getting the desired effect.
+
+   ENHANCE-ME: We should test _IBMR2 here when we add assembly support for
+   the system vendor compilers.  (Is that vendor compilers with inline asm,
+   or what?)  */
+
+#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)	\
+  && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
+		 __CLOBBER_CC);						\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
+		 __CLOBBER_CC);						\
+    else								\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
+		 __CLOBBER_CC);						\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
+    else								\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl)		\
+		 __CLOBBER_CC);						\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if HAVE_HOST_CPU_FAMILY_powerpc
+#if __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __ll = (UDItype)(u) * (v);					\
+    w1 = __ll >> 32;							\
+    w0 = __ll;								\
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    SItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#else
+#define smul_ppmm(xh, xl, m0, m1) \
+  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#endif
+#endif /* 32-bit POWER architecture variants.  */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+   vendor compilers.  */
+#if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
+#if !defined (_LONG_LONG_LIMB)
+/* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values.  So
+   use adde etc only when not _LONG_LONG_LIMB.  */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r"  ((UDItype)(ah)),					\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r"  ((UDItype)(ah)),					\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
+    else								\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r"  ((UDItype)(ah)), "r"  ((UDItype)(bh)),		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
+  } while (0)
+/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
+   This might seem strange, but gcc folds away the dead code late.  */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bl)					\
+	&& (bl) > -0x8000 && (bl) <= 0x8000 && (bl) != 0) {		\
+	if (__builtin_constant_p (ah) && (ah) == 0)			\
+	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   :                       "r" ((UDItype)(bh)),		\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
+	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   :                       "r" ((UDItype)(bh)),		\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (bh) && (bh) == 0)		\
+	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r" ((UDItype)(ah)),				\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
+	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r" ((UDItype)(ah)),				\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
+	else								\
+	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
+    } else {								\
+	if (__builtin_constant_p (ah) && (ah) == 0)			\
+	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   :                       "r" ((UDItype)(bh)),		\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
+	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   :                       "r" ((UDItype)(bh)),		\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (bh) && (bh) == 0)		\
+	  __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r"  ((UDItype)(ah)),				\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
+	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
+	  __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r"  ((UDItype)(ah)),				\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
+	else								\
+	  __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"		\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
+    }									\
+  } while (0)
+#endif /* ! _LONG_LONG_LIMB */
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#if __GMP_GNUC_PREREQ (4,8)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\
+    w1 = __ll >> 64;							\
+    w0 = __ll;								\
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    DItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#endif /* 64-bit PowerPC.  */
+
+#if defined (__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addw %5,%1\n\taddwc %3,%0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subw %5,%1\n\tsubwb %3,%0"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+  __asm__ ("movw %1,%R0\n\tuemul %2,%0"					\
+	   : "=&r" (__x.__ll)						\
+	   : "g" ((USItype) (u)), "g" ((USItype)(v)));			\
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#endif /* __pyr__ */
+
+#if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("a %1,%5\n\tae %0,%3"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0"  ((USItype)(ah)), "r" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("s %1,%5\n\tse %0,%3"					\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0" ((USItype)(ah)), "r" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define smul_ppmm(ph, pl, m0, m1) \
+  __asm__ (								\
+       "s	r2,r2\n"						\
+"	mts r10,%2\n"							\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	cas	%0,r2,r0\n"						\
+"	mfs	r10,%1"							\
+	   : "=r" (ph), "=r" (pl)					\
+	   : "%r" ((USItype)(m0)), "r" ((USItype)(m1))			\
+	   : "r2")
+#define count_leading_zeros(count, x) \
+  do {									\
+    if ((x) >= 0x10000)							\
+      __asm__ ("clz	%0,%1"						\
+	       : "=r" (count) : "r" ((USItype)(x) >> 16));		\
+    else								\
+      {									\
+	__asm__ ("clz	%0,%1"						\
+		 : "=r" (count) : "r" ((USItype)(x)));			\
+	(count) += 16;							\
+      }									\
+  } while (0)
+#endif /* RT/ROMP */
+
+#if defined (__riscv) && defined (__riscv_mul) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, u, v) \
+  do {									\
+    UDItype __u = (u), __v = (v);					\
+    (pl) = __u * __v;							\
+    __asm__ ("mulhu\t%0, %1, %2" : "=r" (ph) : "%r" (__u), "r" (__v));	\
+  } while (0)
+#endif
+
+#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0"		\
+	   : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)			\
+	   __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl)	\
+	   __CLOBBER_CC)
+/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
+   doesn't define anything to indicate that to us, it only sets __sparcv8. */
+#if defined (__sparc_v9__) || defined (__sparcv9)
+/* Perhaps we should use floating-point operations here?  */
+#if 0
+/* Triggers a bug making mpz/tests/t-gcd.c fail.
+   Perhaps we simply need explicitly zero-extend the inputs?  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" :		\
+	   "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
+#else
+/* Use v8 umul until above bug is fixed.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#endif
+/* Use a plain v8 divide for v9.  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    USItype __q;							\
+    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
+    (r) = (n0) - __q * (d);						\
+    (q) = __q;								\
+  } while (0)
+#else
+#if defined (__sparc_v8__)   /* gcc normal */				\
+  || defined (__sparcv8)     /* gcc solaris */				\
+  || HAVE_HOST_CPU_supersparc
+/* Don't match immediate range because, 1) it is not often useful,
+   2) the 'I' flag thinks of the range as a 13 bit signed interval,
+   while we want to match a 13 bit interval, sign extended to 32 bits,
+   but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+
+#if HAVE_HOST_CPU_supersparc
+#else
+/* Don't use this on SuperSPARC because its udiv only handles 53 bit
+   dividends and will trap to the kernel for the rest. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    USItype __q;							\
+    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
+    (r) = (n0) - __q * (d);						\
+    (q) = __q;								\
+  } while (0)
+#endif /* HAVE_HOST_CPU_supersparc */
+
+#else /* ! __sparc_v8__ */
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+   instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
+"	tst	%%g0\n"							\
+"	divscc	%3,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%0\n"						\
+"	rd	%%y,%1\n"						\
+"	bl,a 1f\n"							\
+"	add	%1,%4,%1\n"						\
+"1:	! End of inline udiv_qrnnd"					\
+	   : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d)		\
+	   : "%g1" __AND_CLOBBER_CC)
+#define count_leading_zeros(count, x) \
+  __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+   undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* __sparc_v9__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n"					\
+"	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
+"	sra	%3,31,%%g2	! Don't move this insn\n"		\
+"	and	%2,%%g2,%%g2	! Don't move this insn\n"		\
+"	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,0,%%g1\n"						\
+"	add	%%g1,%%g2,%0\n"						\
+"	rd	%%y,%1"							\
+	   : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v)			\
+	   : "%g1", "%g2" __AND_CLOBBER_CC)
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __r;							\
+    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));			\
+    (r) = __r;								\
+  } while (0)
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ (								\
+       "addcc	%r4,%5,%1\n"						\
+      "	addccc	%r6,%7,%%g0\n"						\
+      "	addc	%r2,%3,%0"						\
+       : "=r" (sh), "=&r" (sl)						\
+       : "rJ"  ((UDItype)(ah)), "rI" ((UDItype)(bh)),			\
+	 "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)),			\
+	 "%rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32)	\
+	   __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ (								\
+       "subcc	%r4,%5,%1\n"						\
+      "	subccc	%r6,%7,%%g0\n"						\
+      "	subc	%r2,%3,%0"						\
+       : "=r" (sh), "=&r" (sl)						\
+       : "rJ" ((UDItype)(ah)), "rI" ((UDItype)(bh)),			\
+	 "rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)),			\
+	 "rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32)		\
+	   __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_ssaaaa
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ (								\
+       "addcc	%r4, %5, %1\n"						\
+      "	addxc	%r2, %r3, %0"						\
+	  : "=r" (sh), "=&r" (sl)					\
+       : "rJ"  ((UDItype)(ah)), "rJ" ((UDItype)(bh)),			\
+	 "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (pl) = __m0 * __m1;							\
+    __asm__ ("umulxhi\t%2, %1, %0"					\
+	     : "=r" (ph)						\
+	     : "%r" (__m0), "r" (__m1));				\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x))
+/* Needed by count_leading_zeros_32 in sparc64.h.  */
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+#endif
+
+#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
+	   : "=g" (sh), "=&g" (sl)					\
+	   : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
+	   : "=g" (sh), "=&g" (sl)					\
+	   : "0" ((USItype)(ah)), "g" ((USItype)(bh)),			\
+	     "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __x;							\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("emul %1,%2,$0,%0"						\
+	     : "=g" (__x.__ll) : "g" (__m0), "g" (__m1));		\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {SItype __l, __h;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("ediv %3,%2,%0,%1"						\
+	     : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d));		\
+  } while (0)
+#if 0
+/* FIXME: This instruction appears to be unimplemented on some systems (vax
+   8800 maybe). */
+#define count_trailing_zeros(count,x)					\
+  do {									\
+    __asm__ ("ffs 0, 31, %1, %0"					\
+	     : "=g" (count)						\
+	     : "g" ((USItype) (x)));					\
+  } while (0)
+#endif
+#endif /* vax */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0"  ((unsigned int)(ah)), "r" ((unsigned int)(bh)),	\
+	     "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)),	\
+	     "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {long int __ll;						\
+	   struct {unsigned int __h, __l;} __i;				\
+	  } __x;							\
+    unsigned int __m0 = (m0), __m1 = (m1);				\
+    __asm__ ("mult	%S0,%H3"					\
+	     : "=r" (__x.__i.__h), "=r" (__x.__i.__l)			\
+	     : "%1" (m0), "rQR" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+    (xh) += ((((signed int) __m0 >> 15) & __m1)				\
+	     + (((signed int) __m1 >> 15) & __m0));			\
+  } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+#endif /* NO_ASM */
+
+
+/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti".  */
+#if !defined (umul_ppmm) && defined (__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDWtype __ll = __umulsidi3 (m0, m1);				\
+    ph = (UWtype) (__ll >> W_TYPE_SIZE);				\
+    pl = (UWtype) __ll;							\
+  } while (0)
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+  ({UWtype __hi, __lo;							\
+    umul_ppmm (__hi, __lo, u, v);					\
+    ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+
+#if defined (__cplusplus)
+#define __longlong_h_C "C"
+#else
+#define __longlong_h_C
+#endif
+
+/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist.  The "_r"
+   forms have "reversed" arguments, meaning the pointer is last, which
+   sometimes allows better parameter passing, in particular on 64-bit
+   hppa. */
+
+#define mpn_umul_ppmm  __MPN(umul_ppmm)
+extern __longlong_h_C UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype);
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \
+  && ! defined (LONGLONG_STANDALONE)
+#define umul_ppmm(wh, wl, u, v)						\
+  do {									\
+    UWtype __umul_ppmm__p0;						\
+    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));\
+    (wl) = __umul_ppmm__p0;						\
+  } while (0)
+#endif
+
+#define mpn_umul_ppmm_r  __MPN(umul_ppmm_r)
+extern __longlong_h_C UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *);
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r	\
+  && ! defined (LONGLONG_STANDALONE)
+#define umul_ppmm(wh, wl, u, v)						\
+  do {									\
+    UWtype __umul_p0;							\
+    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_p0);	\
+    (wl) = __umul_p0;							\
+  } while (0)
+#endif
+
+#define mpn_udiv_qrnnd  __MPN(udiv_qrnnd)
+extern __longlong_h_C UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype);
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd	\
+  && ! defined (LONGLONG_STANDALONE)
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    UWtype __udiv_qrnnd_r;						\
+    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd_r,				\
+			  (UWtype) (n1), (UWtype) (n0), (UWtype) d);	\
+    (r) = __udiv_qrnnd_r;						\
+  } while (0)
+#endif
+
+#define mpn_udiv_qrnnd_r  __MPN(udiv_qrnnd_r)
+extern __longlong_h_C UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *);
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r	\
+  && ! defined (LONGLONG_STANDALONE)
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    UWtype __udiv_qrnnd_r;						\
+    (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d,	\
+			    &__udiv_qrnnd_r);				\
+    (r) = __udiv_qrnnd_r;						\
+  } while (0)
+#endif
+
+
+/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    UWtype __al = (al);							\
+    UWtype __bl = (bl);							\
+    __x = __al + __bl;							\
+    (sh) = (ah) + (bh) + (__x < __al);					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    UWtype __al = (al);							\
+    UWtype __bl = (bl);							\
+    __x = __al - __bl;							\
+    (sh) = (ah) - (bh) - (__al < __bl);					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+   smul_ppmm.  */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __xm0 = (u), __xm1 = (v);					\
+    smul_ppmm (__w1, w0, __xm0, __xm1);					\
+    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
+		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
+  } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C.
+
+   For reference, when this code is used for squaring (ie. u and v identical
+   expressions), gcc recognises __x1 and __x2 are the same and generates 3
+   multiplies, not 4.  The subsequent additions could be optimized a bit,
+   but the only place GMP currently uses such a square is mpn_sqr_basecase,
+   and chips obliged to use this generic C umul will have plenty of worse
+   performance problems than a couple of extra instructions on the diagonal
+   of sqr_basecase.  */
+
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+    UWtype __u = (u), __v = (v);					\
+									\
+    __ul = __ll_lowpart (__u);						\
+    __uh = __ll_highpart (__u);						\
+    __vl = __ll_lowpart (__v);						\
+    __vh = __ll_highpart (__v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos. */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0);		\
+  } while (0)
+#endif
+
+/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
+   exist in one form or another.  */
+#if !defined (smul_ppmm)
+#define smul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __xm0 = (u), __xm1 = (v);					\
+    umul_ppmm (__w1, w0, __xm0, __xm1);					\
+    (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
+		- (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
+  } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
+									\
+    ASSERT ((d) != 0);							\
+    ASSERT ((n1) < (d));						\
+									\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __q1 = (n1) / __d1;							\
+    __r1 = (n1) - __q1 * __d1;						\
+    __m = __q1 * __d0;							\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __q0 = __r1 / __d1;							\
+    __r0 = __r1  - __q0 * __d1;						\
+    __m = __q0 * __d0;							\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = __q1 * __ll_B | __q0;						\
+    (r) = __r0;								\
+  } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) \
+  && ! defined (LONGLONG_STANDALONE)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {									\
+    UWtype __r;								\
+    (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);				\
+    (r) = __r;								\
+  } while (0)
+__GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype);
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+#define count_leading_zeros(count, x) \
+  do {									\
+    UWtype __xr = (x);							\
+    UWtype __a;								\
+									\
+    if (W_TYPE_SIZE == 32)						\
+      {									\
+	__a = __xr < ((UWtype) 1 << 2*__BITS4)				\
+	  ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1)		\
+	  : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1		\
+	  : 3*__BITS4 + 1);						\
+      }									\
+    else								\
+      {									\
+	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
+	  if (((__xr >> __a) & 0xff) != 0)				\
+	    break;							\
+	++__a;								\
+      }									\
+									\
+    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];		\
+  } while (0)
+/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_SLOW
+#endif
+
+/* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */
+#if HAVE_HOST_CPU_FAMILY_x86 && WANT_FAT_BINARY
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+extern const unsigned char __GMP_DECLSPEC __clz_tab[129];
+#endif
+
+#if !defined (count_trailing_zeros)
+#if !defined (COUNT_LEADING_ZEROS_SLOW)
+/* Define count_trailing_zeros using an asm count_leading_zeros.  */
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    UWtype __ctz_x = (x);						\
+    UWtype __ctz_c;							\
+    ASSERT (__ctz_x != 0);						\
+    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
+    (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
+  } while (0)
+#else
+/* Define count_trailing_zeros in plain C, assuming small counts are common.
+   We use clz_tab without ado, since the C count_leading_zeros above will have
+   pulled it in.  */
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    UWtype __ctz_x = (x);						\
+    int __ctz_c;							\
+									\
+    if (LIKELY ((__ctz_x & 0xff) != 0))					\
+      (count) = __clz_tab[__ctz_x & -__ctz_x] - 2;			\
+    else								\
+      {									\
+	for (__ctz_c = 8 - 2; __ctz_c < W_TYPE_SIZE - 2; __ctz_c += 8)	\
+	  {								\
+	    __ctz_x >>= 8;						\
+	    if (LIKELY ((__ctz_x & 0xff) != 0))				\
+	      break;							\
+	  }								\
+									\
+	(count) = __ctz_c + __clz_tab[__ctz_x & -__ctz_x];		\
+      }									\
+  } while (0)
+#endif
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
+
+/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
+   that hence the latter should always be used.  */
+#ifndef UDIV_PREINV_ALWAYS
+#define UDIV_PREINV_ALWAYS 0
+#endif

diff --git a/memory.c b/memory.c
new file mode 100644
index 0000000..5e00191
--- /dev/null
+++ b/memory.c

@@ -0,0 +1,145 @@
+/* Memory allocation routines.
+
+Copyright 1991, 1993, 1994, 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc, realloc, free */
+
+#include "gmp-impl.h"
+
+
+void * (*__gmp_allocate_func) (size_t) = __gmp_default_allocate;
+void * (*__gmp_reallocate_func) (void *, size_t, size_t) = __gmp_default_reallocate;
+void   (*__gmp_free_func) (void *, size_t) = __gmp_default_free;
+
+
+/* Default allocation functions.  In case of failure to allocate/reallocate
+   an error message is written to stderr and the program aborts.  */
+
+void *
+__gmp_default_allocate (size_t size)
+{
+  void *ret;
+#ifdef DEBUG
+  size_t req_size = size;
+  size += 2 * GMP_LIMB_BYTES;
+#endif
+  ret = malloc (size);
+  if (ret == 0)
+    {
+      fprintf (stderr, "GNU MP: Cannot allocate memory (size=%lu)\n", (long) size);
+      abort ();
+    }
+
+#ifdef DEBUG
+  {
+    mp_ptr p = ret;
+    p++;
+    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+    if (req_size % GMP_LIMB_BYTES == 0)
+      p[req_size / GMP_LIMB_BYTES] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+    ret = p;
+  }
+#endif
+  return ret;
+}
+
+void *
+__gmp_default_reallocate (void *oldptr, size_t old_size, size_t new_size)
+{
+  void *ret;
+
+#ifdef DEBUG
+  size_t req_size = new_size;
+
+  if (old_size != 0)
+    {
+      mp_ptr p = oldptr;
+      if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+	{
+	  fprintf (stderr, "gmp: (realloc) data clobbered before allocation block\n");
+	  abort ();
+	}
+      if (old_size % GMP_LIMB_BYTES == 0)
+	if (p[old_size / GMP_LIMB_BYTES] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+	  {
+	    fprintf (stderr, "gmp: (realloc) data clobbered after allocation block\n");
+	    abort ();
+	  }
+      oldptr = p - 1;
+    }
+
+  new_size += 2 * GMP_LIMB_BYTES;
+#endif
+
+  ret = realloc (oldptr, new_size);
+  if (ret == 0)
+    {
+      fprintf (stderr, "GNU MP: Cannot reallocate memory (old_size=%lu new_size=%lu)\n", (long) old_size, (long) new_size);
+      abort ();
+    }
+
+#ifdef DEBUG
+  {
+    mp_ptr p = ret;
+    p++;
+    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+    if (req_size % GMP_LIMB_BYTES == 0)
+      p[req_size / GMP_LIMB_BYTES] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+    ret = p;
+  }
+#endif
+  return ret;
+}
+
+void
+__gmp_default_free (void *blk_ptr, size_t blk_size)
+{
+#ifdef DEBUG
+  {
+    mp_ptr p = blk_ptr;
+    if (blk_size != 0)
+      {
+	if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+	  {
+	    fprintf (stderr, "gmp: (free) data clobbered before allocation block\n");
+	    abort ();
+	  }
+	if (blk_size % GMP_LIMB_BYTES == 0)
+	  if (p[blk_size / GMP_LIMB_BYTES] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+	    {
+	      fprintf (stderr, "gmp: (free) data clobbered after allocation block\n");
+	      abort ();
+	    }
+      }
+    blk_ptr = p - 1;
+  }
+#endif
+  free (blk_ptr);
+}

diff --git a/mini-gmp/mini-gmp.c b/mini-gmp/mini-gmp.c
new file mode 100644
index 0000000..69a72bf
--- /dev/null
+++ b/mini-gmp/mini-gmp.c

@@ -0,0 +1,4627 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+   Contributed to the GNU project by Niels Möller
+   Additional functionalities and improvements by Marco Bodrato.
+
+Copyright 1991-1997, 1999-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* NOTE: All functions in this file which are not declared in
+   mini-gmp.h are internal, and are not intended to be compatible
+   with GMP or with future versions of mini-gmp. */
+
+/* Much of the material copied from GMP files, including: gmp-impl.h,
+   longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
+   mpn/generic/lshift.c, mpn/generic/mul_1.c,
+   mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
+   mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
+   mpn/generic/submul_1.c. */
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mini-gmp.h"
+
+#if !defined(MINI_GMP_DONT_USE_FLOAT_H)
+#include <float.h>
+#endif
+
+
+/* Macros */
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
+#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
+
+#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
+#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
+
+#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
+#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
+
+#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
+#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
+
+#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
+
+#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
+#define GMP_DBL_MANT_BITS DBL_MANT_DIG
+#else
+#define GMP_DBL_MANT_BITS (53)
+#endif
+
+/* Return non-zero if xp,xsize and yp,ysize overlap.
+   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
+   overlap.  If both these are false, there's an overlap. */
+#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize)				\
+  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+
+#define gmp_assert_nocarry(x) do { \
+    mp_limb_t __cy = (x);	   \
+    assert (__cy == 0);		   \
+    (void) (__cy);		   \
+  } while (0)
+
+#define gmp_clz(count, x) do {						\
+    mp_limb_t __clz_x = (x);						\
+    unsigned __clz_c = 0;						\
+    int LOCAL_SHIFT_BITS = 8;						\
+    if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)				\
+      for (;								\
+	   (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0;	\
+	   __clz_c += 8)						\
+	{ __clz_x <<= LOCAL_SHIFT_BITS;	}				\
+    for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++)		\
+      __clz_x <<= 1;							\
+    (count) = __clz_c;							\
+  } while (0)
+
+#define gmp_ctz(count, x) do {						\
+    mp_limb_t __ctz_x = (x);						\
+    unsigned __ctz_c = 0;						\
+    gmp_clz (__ctz_c, __ctz_x & - __ctz_x);				\
+    (count) = GMP_LIMB_BITS - 1 - __ctz_c;				\
+  } while (0)
+
+#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    mp_limb_t __x;							\
+    __x = (al) + (bl);							\
+    (sh) = (ah) + (bh) + (__x < (al));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    mp_limb_t __x;							\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - ((al) < (bl));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define gmp_umul_ppmm(w1, w0, u, v)					\
+  do {									\
+    int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;				\
+    if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS)		\
+      {									\
+	unsigned int __ww = (unsigned int) (u) * (v);			\
+	w0 = (mp_limb_t) __ww;						\
+	w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS);			\
+      }									\
+    else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS)			\
+      {									\
+	unsigned long int __ww = (unsigned long int) (u) * (v);		\
+	w0 = (mp_limb_t) __ww;						\
+	w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS);			\
+      }									\
+    else {								\
+      mp_limb_t __x0, __x1, __x2, __x3;					\
+      unsigned __ul, __vl, __uh, __vh;					\
+      mp_limb_t __u = (u), __v = (v);					\
+      assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t));		\
+									\
+      __ul = __u & GMP_LLIMB_MASK;					\
+      __uh = __u >> (GMP_LIMB_BITS / 2);				\
+      __vl = __v & GMP_LLIMB_MASK;					\
+      __vh = __v >> (GMP_LIMB_BITS / 2);				\
+									\
+      __x0 = (mp_limb_t) __ul * __vl;					\
+      __x1 = (mp_limb_t) __ul * __vh;					\
+      __x2 = (mp_limb_t) __uh * __vl;					\
+      __x3 = (mp_limb_t) __uh * __vh;					\
+									\
+      __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */	\
+      __x1 += __x2;		/* but this indeed can */		\
+      if (__x1 < __x2)		/* did we get it? */			\
+	__x3 += GMP_HLIMB_BIT;	/* yes, add it in the proper pos. */	\
+									\
+      (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2));			\
+      (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK);	\
+    }									\
+  } while (0)
+
+/* If mp_limb_t is of size smaller than int, plain u*v implies
+   automatic promotion to *signed* int, and then multiply may overflow
+   and cause undefined behavior. Explicitly cast to unsigned int for
+   that case. */
+#define gmp_umullo_limb(u, v) \
+  ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v))
+
+#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di)			\
+  do {									\
+    mp_limb_t _qh, _ql, _r, _mask;					\
+    gmp_umul_ppmm (_qh, _ql, (nh), (di));				\
+    gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+    _r = (nl) - gmp_umullo_limb (_qh, (d));				\
+    _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */		\
+    _qh += _mask;							\
+    _r += _mask & (d);							\
+    if (_r >= (d))							\
+      {									\
+	_r -= (d);							\
+	_qh++;								\
+      }									\
+									\
+    (r) = _r;								\
+    (q) = _qh;								\
+  } while (0)
+
+#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
+  do {									\
+    mp_limb_t _q0, _t1, _t0, _mask;					\
+    gmp_umul_ppmm ((q), _q0, (n2), (dinv));				\
+    gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
+									\
+    /* Compute the two most significant limbs of n - q'd */		\
+    (r1) = (n1) - gmp_umullo_limb ((d1), (q));				\
+    gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));		\
+    gmp_umul_ppmm (_t1, _t0, (d0), (q));				\
+    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
+    (q)++;								\
+									\
+    /* Conditionally adjust q and the remainders */			\
+    _mask = - (mp_limb_t) ((r1) >= _q0);				\
+    (q) += _mask;							\
+    gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
+    if ((r1) >= (d1))							\
+      {									\
+	if ((r1) > (d1) || (r0) >= (d0))				\
+	  {								\
+	    (q)++;							\
+	    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));	\
+	  }								\
+      }									\
+  } while (0)
+
+/* Swap macros. */
+#define MP_LIMB_T_SWAP(x, y)						\
+  do {									\
+    mp_limb_t __mp_limb_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_limb_t_swap__tmp;					\
+  } while (0)
+#define MP_SIZE_T_SWAP(x, y)						\
+  do {									\
+    mp_size_t __mp_size_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_size_t_swap__tmp;					\
+  } while (0)
+#define MP_BITCNT_T_SWAP(x,y)			\
+  do {						\
+    mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x);	\
+    (x) = (y);					\
+    (y) = __mp_bitcnt_t_swap__tmp;		\
+  } while (0)
+#define MP_PTR_SWAP(x, y)						\
+  do {									\
+    mp_ptr __mp_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mp_ptr_swap__tmp;						\
+  } while (0)
+#define MP_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mp_srcptr __mp_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_srcptr_swap__tmp;					\
+  } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_PTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_SRCPTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+
+#define MPZ_PTR_SWAP(x, y)						\
+  do {									\
+    mpz_ptr __mpz_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mpz_ptr_swap__tmp;						\
+  } while (0)
+#define MPZ_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mpz_srcptr_swap__tmp;					\
+  } while (0)
+
+const int mp_bits_per_limb = GMP_LIMB_BITS;
+
+
+/* Memory allocation and other helper functions. */
+static void
+gmp_die (const char *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+  abort();
+}
+
+static void *
+gmp_default_alloc (size_t size)
+{
+  void *p;
+
+  assert (size > 0);
+
+  p = malloc (size);
+  if (!p)
+    gmp_die("gmp_default_alloc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void *
+gmp_default_realloc (void *old, size_t unused_old_size, size_t new_size)
+{
+  void * p;
+
+  p = realloc (old, new_size);
+
+  if (!p)
+    gmp_die("gmp_default_realloc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void
+gmp_default_free (void *p, size_t unused_size)
+{
+  free (p);
+}
+
+static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
+static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
+static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+			 void *(**realloc_func) (void *, size_t, size_t),
+			 void (**free_func) (void *, size_t))
+{
+  if (alloc_func)
+    *alloc_func = gmp_allocate_func;
+
+  if (realloc_func)
+    *realloc_func = gmp_reallocate_func;
+
+  if (free_func)
+    *free_func = gmp_free_func;
+}
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+			 void *(*realloc_func) (void *, size_t, size_t),
+			 void (*free_func) (void *, size_t))
+{
+  if (!alloc_func)
+    alloc_func = gmp_default_alloc;
+  if (!realloc_func)
+    realloc_func = gmp_default_realloc;
+  if (!free_func)
+    free_func = gmp_default_free;
+
+  gmp_allocate_func = alloc_func;
+  gmp_reallocate_func = realloc_func;
+  gmp_free_func = free_func;
+}
+
+#define gmp_alloc(size) ((*gmp_allocate_func)((size)))
+#define gmp_free(p, size) ((*gmp_free_func) ((p), (size)))
+#define gmp_realloc(ptr, old_size, size) ((*gmp_reallocate_func)(ptr, old_size, size))
+
+static mp_ptr
+gmp_alloc_limbs (mp_size_t size)
+{
+  return (mp_ptr) gmp_alloc (size * sizeof (mp_limb_t));
+}
+
+static mp_ptr
+gmp_realloc_limbs (mp_ptr old, mp_size_t old_size, mp_size_t size)
+{
+  assert (size > 0);
+  return (mp_ptr) gmp_realloc (old, old_size * sizeof (mp_limb_t), size * sizeof (mp_limb_t));
+}
+
+static void
+gmp_free_limbs (mp_ptr old, mp_size_t size)
+{
+  gmp_free (old, size * sizeof (mp_limb_t));
+}
+
+
+/* MPN interface */
+
+void
+mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    d[i] = s[i];
+}
+
+void
+mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  while (--n >= 0)
+    d[n] = s[n];
+}
+
+int
+mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  while (--n >= 0)
+    {
+      if (ap[n] != bp[n])
+	return ap[n] > bp[n] ? 1 : -1;
+    }
+  return 0;
+}
+
+static int
+mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  if (an != bn)
+    return an < bn ? -1 : 1;
+  else
+    return mpn_cmp (ap, bp, an);
+}
+
+static mp_size_t
+mpn_normalized_size (mp_srcptr xp, mp_size_t n)
+{
+  while (n > 0 && xp[n-1] == 0)
+    --n;
+  return n;
+}
+
+int
+mpn_zero_p(mp_srcptr rp, mp_size_t n)
+{
+  return mpn_normalized_size (rp, n) == 0;
+}
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+  while (--n >= 0)
+    rp[n] = 0;
+}
+
+mp_limb_t
+mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+  i = 0;
+  do
+    {
+      mp_limb_t r = ap[i] + b;
+      /* Carry out */
+      b = (r < b);
+      rp[i] = r;
+    }
+  while (++i < n);
+
+  return b;
+}
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b, r;
+      a = ap[i]; b = bp[i];
+      r = a + cy;
+      cy = (r < cy);
+      r += b;
+      cy += (r < b);
+      rp[i] = r;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_add_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+
+  i = 0;
+  do
+    {
+      mp_limb_t a = ap[i];
+      /* Carry out */
+      mp_limb_t cy = a < b;
+      rp[i] = a - b;
+      b = cy;
+    }
+  while (++i < n);
+
+  return b;
+}
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b;
+      a = ap[i]; b = bp[i];
+      b += cy;
+      cy = (b < cy);
+      cy += (a < b);
+      rp[i] = a - b;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_sub_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl + lpl;
+      cl += lpl < rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl - lpl;
+      cl += lpl > rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+  assert (un >= vn);
+  assert (vn >= 1);
+  assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
+  assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
+
+  /* We first multiply by the low order limb. This result can be
+     stored, not added, to rp. We also avoid a loop for zeroing this
+     way. */
+
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+
+  /* Now accumulate the product of up[] and the next higher limb from
+     vp[]. */
+
+  while (--vn >= 1)
+    {
+      rp += 1, vp += 1;
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+    }
+  return rp[un];
+}
+
+void
+mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, bp, n);
+}
+
+void
+mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, ap, n);
+}
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_LIMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  while (--n != 0)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt);
+    }
+  *--rp = high_limb;
+
+  return retval;
+}
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  tnc = GMP_LIMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc);
+  low_limb = high_limb >> cnt;
+
+  while (--n != 0)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | (high_limb << tnc);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}
+
+static mp_bitcnt_t
+mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
+		 mp_limb_t ux)
+{
+  unsigned cnt;
+
+  assert (ux == 0 || ux == GMP_LIMB_MAX);
+  assert (0 <= i && i <= un );
+
+  while (limb == 0)
+    {
+      i++;
+      if (i == un)
+	return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
+      limb = ux ^ up[i];
+    }
+  gmp_ctz (cnt, limb);
+  return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
+{
+  mp_size_t i;
+  i = bit / GMP_LIMB_BITS;
+
+  return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
+			  i, ptr, i, 0);
+}
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit)
+{
+  mp_size_t i;
+  i = bit / GMP_LIMB_BITS;
+
+  return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
+			  i, ptr, i, GMP_LIMB_MAX);
+}
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  while (--n >= 0)
+    *rp++ = ~ *up++;
+}
+
+mp_limb_t
+mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  while (*up == 0)
+    {
+      *rp = 0;
+      if (!--n)
+	return 0;
+      ++up; ++rp;
+    }
+  *rp = - *up;
+  mpn_com (++rp, ++up, --n);
+  return 1;
+}
+
+
+/* MPN division interface. */
+
+/* The 3/2 inverse is defined as
+
+     m = floor( (B^3-1) / (B u1 + u0)) - B
+*/
+mp_limb_t
+mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
+{
+  mp_limb_t r, m;
+
+  {
+    mp_limb_t p, ql;
+    unsigned ul, uh, qh;
+
+    assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t));
+    /* For notation, let b denote the half-limb base, so that B = b^2.
+       Split u1 = b uh + ul. */
+    ul = u1 & GMP_LLIMB_MASK;
+    uh = u1 >> (GMP_LIMB_BITS / 2);
+
+    /* Approximation of the high half of quotient. Differs from the 2/1
+       inverse of the half limb uh, since we have already subtracted
+       u0. */
+    qh = (u1 ^ GMP_LIMB_MAX) / uh;
+
+    /* Adjust to get a half-limb 3/2 inverse, i.e., we want
+
+       qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
+	   = floor( (b (~u) + b-1) / u),
+
+       and the remainder
+
+       r = b (~u) + b-1 - qh (b uh + ul)
+       = b (~u - qh uh) + b-1 - qh ul
+
+       Subtraction of qh ul may underflow, which implies adjustments.
+       But by normalization, 2 u >= B > qh ul, so we need to adjust by
+       at most 2.
+    */
+
+    r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
+
+    p = (mp_limb_t) qh * ul;
+    /* Adjustment steps taken from udiv_qrnnd_c */
+    if (r < p)
+      {
+	qh--;
+	r += u1;
+	if (r >= u1) /* i.e. we didn't get carry when adding to r */
+	  if (r < p)
+	    {
+	      qh--;
+	      r += u1;
+	    }
+      }
+    r -= p;
+
+    /* Low half of the quotient is
+
+       ql = floor ( (b r + b-1) / u1).
+
+       This is a 3/2 division (on half-limbs), for which qh is a
+       suitable inverse. */
+
+    p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
+    /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
+       work, it is essential that ql is a full mp_limb_t. */
+    ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
+
+    /* By the 3/2 trick, we don't need the high half limb. */
+    r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
+
+    if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
+      {
+	ql--;
+	r += u1;
+      }
+    m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
+    if (r >= u1)
+      {
+	m++;
+	r -= u1;
+      }
+  }
+
+  /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
+     3/2 inverse. */
+  if (u0 > 0)
+    {
+      mp_limb_t th, tl;
+      r = ~r;
+      r += u0;
+      if (r < u0)
+	{
+	  m--;
+	  if (r >= u1)
+	    {
+	      m--;
+	      r -= u1;
+	    }
+	  r -= u1;
+	}
+      gmp_umul_ppmm (th, tl, u0, m);
+      r += th;
+      if (r < th)
+	{
+	  m--;
+	  m -= ((r > u1) | ((r == u1) & (tl > u0)));
+	}
+    }
+
+  return m;
+}
+
+struct gmp_div_inverse
+{
+  /* Normalization shift count. */
+  unsigned shift;
+  /* Normalized divisor (d0 unused for mpn_div_qr_1) */
+  mp_limb_t d1, d0;
+  /* Inverse, for 2/1 or 3/2. */
+  mp_limb_t di;
+};
+
+static void
+mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
+{
+  unsigned shift;
+
+  assert (d > 0);
+  gmp_clz (shift, d);
+  inv->shift = shift;
+  inv->d1 = d << shift;
+  inv->di = mpn_invert_limb (inv->d1);
+}
+
+static void
+mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
+		     mp_limb_t d1, mp_limb_t d0)
+{
+  unsigned shift;
+
+  assert (d1 > 0);
+  gmp_clz (shift, d1);
+  inv->shift = shift;
+  if (shift > 0)
+    {
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+    }
+  inv->d1 = d1;
+  inv->d0 = d0;
+  inv->di = mpn_invert_3by2 (d1, d0);
+}
+
+static void
+mpn_div_qr_invert (struct gmp_div_inverse *inv,
+		   mp_srcptr dp, mp_size_t dn)
+{
+  assert (dn > 0);
+
+  if (dn == 1)
+    mpn_div_qr_1_invert (inv, dp[0]);
+  else if (dn == 2)
+    mpn_div_qr_2_invert (inv, dp[1], dp[0]);
+  else
+    {
+      unsigned shift;
+      mp_limb_t d1, d0;
+
+      d1 = dp[dn-1];
+      d0 = dp[dn-2];
+      assert (d1 > 0);
+      gmp_clz (shift, d1);
+      inv->shift = shift;
+      if (shift > 0)
+	{
+	  d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+	  d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
+	}
+      inv->d1 = d1;
+      inv->d0 = d0;
+      inv->di = mpn_invert_3by2 (d1, d0);
+    }
+}
+
+/* Not matching current public gmp interface, rather corresponding to
+   the sbpi1_div_* functions. */
+static mp_limb_t
+mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
+		     const struct gmp_div_inverse *inv)
+{
+  mp_limb_t d, di;
+  mp_limb_t r;
+  mp_ptr tp = NULL;
+  mp_size_t tn = 0;
+
+  if (inv->shift > 0)
+    {
+      /* Shift, reusing qp area if possible. In-place shift if qp == np. */
+      tp = qp;
+      if (!tp)
+        {
+	   tn = nn;
+	   tp = gmp_alloc_limbs (tn);
+        }
+      r = mpn_lshift (tp, np, nn, inv->shift);
+      np = tp;
+    }
+  else
+    r = 0;
+
+  d = inv->d1;
+  di = inv->di;
+  while (--nn >= 0)
+    {
+      mp_limb_t q;
+
+      gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
+      if (qp)
+	qp[nn] = q;
+    }
+  if (tn)
+    gmp_free_limbs (tp, tn);
+
+  return r >> inv->shift;
+}
+
+static void
+mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		     const struct gmp_div_inverse *inv)
+{
+  unsigned shift;
+  mp_size_t i;
+  mp_limb_t d1, d0, di, r1, r0;
+
+  assert (nn >= 2);
+  shift = inv->shift;
+  d1 = inv->d1;
+  d0 = inv->d0;
+  di = inv->di;
+
+  if (shift > 0)
+    r1 = mpn_lshift (np, np, nn, shift);
+  else
+    r1 = 0;
+
+  r0 = np[nn - 1];
+
+  i = nn - 2;
+  do
+    {
+      mp_limb_t n0, q;
+      n0 = np[i];
+      gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+
+      if (qp)
+	qp[i] = q;
+    }
+  while (--i >= 0);
+
+  if (shift > 0)
+    {
+      assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
+      r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
+      r1 >>= shift;
+    }
+
+  np[1] = r1;
+  np[0] = r0;
+}
+
+static void
+mpn_div_qr_pi1 (mp_ptr qp,
+		mp_ptr np, mp_size_t nn, mp_limb_t n1,
+		mp_srcptr dp, mp_size_t dn,
+		mp_limb_t dinv)
+{
+  mp_size_t i;
+
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  assert (dn > 2);
+  assert (nn >= dn);
+
+  d1 = dp[dn - 1];
+  d0 = dp[dn - 2];
+
+  assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
+  /* Iteration variable is the index of the q limb.
+   *
+   * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
+   * by            <d1,          d0,        dp[dn-3],  ..., dp[0] >
+   */
+
+  i = nn - dn;
+  do
+    {
+      mp_limb_t n0 = np[dn-1+i];
+
+      if (n1 == d1 && n0 == d0)
+	{
+	  q = GMP_LIMB_MAX;
+	  mpn_submul_1 (np+i, dp, dn, q);
+	  n1 = np[dn-1+i];	/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np + i, dp, dn-2, q);
+
+	  cy1 = n0 < cy;
+	  n0 = n0 - cy;
+	  cy = n1 < cy1;
+	  n1 = n1 - cy1;
+	  np[dn-2+i] = n0;
+
+	  if (cy != 0)
+	    {
+	      n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
+	      q--;
+	    }
+	}
+
+      if (qp)
+	qp[i] = q;
+    }
+  while (--i >= 0);
+
+  np[dn - 1] = n1;
+}
+
+static void
+mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		   mp_srcptr dp, mp_size_t dn,
+		   const struct gmp_div_inverse *inv)
+{
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  if (dn == 1)
+    np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
+  else if (dn == 2)
+    mpn_div_qr_2_preinv (qp, np, nn, inv);
+  else
+    {
+      mp_limb_t nh;
+      unsigned shift;
+
+      assert (inv->d1 == dp[dn-1]);
+      assert (inv->d0 == dp[dn-2]);
+      assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
+
+      shift = inv->shift;
+      if (shift > 0)
+	nh = mpn_lshift (np, np, nn, shift);
+      else
+	nh = 0;
+
+      mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
+
+      if (shift > 0)
+	gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
+    }
+}
+
+static void
+mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  struct gmp_div_inverse inv;
+  mp_ptr tp = NULL;
+
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  mpn_div_qr_invert (&inv, dp, dn);
+  if (dn > 2 && inv.shift > 0)
+    {
+      tp = gmp_alloc_limbs (dn);
+      gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
+      dp = tp;
+    }
+  mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
+  if (tp)
+    gmp_free_limbs (tp, dn);
+}
+
+
+/* MPN base conversion. */
+static unsigned
+mpn_base_power_of_two_p (unsigned b)
+{
+  switch (b)
+    {
+    case 2: return 1;
+    case 4: return 2;
+    case 8: return 3;
+    case 16: return 4;
+    case 32: return 5;
+    case 64: return 6;
+    case 128: return 7;
+    case 256: return 8;
+    default: return 0;
+    }
+}
+
+struct mpn_base_info
+{
+  /* bb is the largest power of the base which fits in one limb, and
+     exp is the corresponding exponent. */
+  unsigned exp;
+  mp_limb_t bb;
+};
+
+static void
+mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
+{
+  mp_limb_t m;
+  mp_limb_t p;
+  unsigned exp;
+
+  m = GMP_LIMB_MAX / b;
+  for (exp = 1, p = b; p <= m; exp++)
+    p *= b;
+
+  info->exp = exp;
+  info->bb = p;
+}
+
+static mp_bitcnt_t
+mpn_limb_size_in_base_2 (mp_limb_t u)
+{
+  unsigned shift;
+
+  assert (u > 0);
+  gmp_clz (shift, u);
+  return GMP_LIMB_BITS - shift;
+}
+
+static size_t
+mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
+{
+  unsigned char mask;
+  size_t sn, j;
+  mp_size_t i;
+  unsigned shift;
+
+  sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
+	+ bits - 1) / bits;
+
+  mask = (1U << bits) - 1;
+
+  for (i = 0, j = sn, shift = 0; j-- > 0;)
+    {
+      unsigned char digit = up[i] >> shift;
+
+      shift += bits;
+
+      if (shift >= GMP_LIMB_BITS && ++i < un)
+	{
+	  shift -= GMP_LIMB_BITS;
+	  digit |= up[i] << (bits - shift);
+	}
+      sp[j] = digit & mask;
+    }
+  return sn;
+}
+
+/* We generate digits from the least significant end, and reverse at
+   the end. */
+static size_t
+mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
+		  const struct gmp_div_inverse *binv)
+{
+  mp_size_t i;
+  for (i = 0; w > 0; i++)
+    {
+      mp_limb_t h, l, r;
+
+      h = w >> (GMP_LIMB_BITS - binv->shift);
+      l = w << binv->shift;
+
+      gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
+      assert ((r & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - binv->shift))) == 0);
+      r >>= binv->shift;
+
+      sp[i] = r;
+    }
+  return i;
+}
+
+static size_t
+mpn_get_str_other (unsigned char *sp,
+		   int base, const struct mpn_base_info *info,
+		   mp_ptr up, mp_size_t un)
+{
+  struct gmp_div_inverse binv;
+  size_t sn;
+  size_t i;
+
+  mpn_div_qr_1_invert (&binv, base);
+
+  sn = 0;
+
+  if (un > 1)
+    {
+      struct gmp_div_inverse bbinv;
+      mpn_div_qr_1_invert (&bbinv, info->bb);
+
+      do
+	{
+	  mp_limb_t w;
+	  size_t done;
+	  w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
+	  un -= (up[un-1] == 0);
+	  done = mpn_limb_get_str (sp + sn, w, &binv);
+
+	  for (sn += done; done < info->exp; done++)
+	    sp[sn++] = 0;
+	}
+      while (un > 1);
+    }
+  sn += mpn_limb_get_str (sp + sn, up[0], &binv);
+
+  /* Reverse order */
+  for (i = 0; 2*i + 1 < sn; i++)
+    {
+      unsigned char t = sp[i];
+      sp[i] = sp[sn - i - 1];
+      sp[sn - i - 1] = t;
+    }
+
+  return sn;
+}
+
+size_t
+mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
+{
+  unsigned bits;
+
+  assert (un > 0);
+  assert (up[un-1] > 0);
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_get_str_bits (sp, bits, up, un);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_get_str_other (sp, base, &info, up, un);
+    }
+}
+
+static mp_size_t
+mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
+		  unsigned bits)
+{
+  mp_size_t rn;
+  mp_limb_t limb;
+  unsigned shift;
+
+  for (limb = 0, rn = 0, shift = 0; sn-- > 0; )
+    {
+      limb |= (mp_limb_t) sp[sn] << shift;
+      shift += bits;
+      if (shift >= GMP_LIMB_BITS)
+	{
+	  shift -= GMP_LIMB_BITS;
+	  rp[rn++] = limb;
+	  /* Next line is correct also if shift == 0,
+	     bits == 8, and mp_limb_t == unsigned char. */
+	  limb = (unsigned int) sp[sn] >> (bits - shift);
+	}
+    }
+  if (limb != 0)
+    rp[rn++] = limb;
+  else
+    rn = mpn_normalized_size (rp, rn);
+  return rn;
+}
+
+/* Result is usually normalized, except for all-zero input, in which
+   case a single zero limb is written at *RP, and 1 is returned. */
+static mp_size_t
+mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
+		   mp_limb_t b, const struct mpn_base_info *info)
+{
+  mp_size_t rn;
+  mp_limb_t w;
+  unsigned k;
+  size_t j;
+
+  assert (sn > 0);
+
+  k = 1 + (sn - 1) % info->exp;
+
+  j = 0;
+  w = sp[j++];
+  while (--k != 0)
+    w = w * b + sp[j++];
+
+  rp[0] = w;
+
+  for (rn = 1; j < sn;)
+    {
+      mp_limb_t cy;
+
+      w = sp[j++];
+      for (k = 1; k < info->exp; k++)
+	w = w * b + sp[j++];
+
+      cy = mpn_mul_1 (rp, rp, rn, info->bb);
+      cy += mpn_add_1 (rp, rp, rn, w);
+      if (cy > 0)
+	rp[rn++] = cy;
+    }
+  assert (j == sn);
+
+  return rn;
+}
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
+{
+  unsigned bits;
+
+  if (sn == 0)
+    return 0;
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_set_str_bits (rp, sp, sn, bits);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_set_str_other (rp, sp, sn, base, &info);
+    }
+}
+
+
+/* MPZ interface */
+void
+mpz_init (mpz_t r)
+{
+  static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
+
+  r->_mp_alloc = 0;
+  r->_mp_size = 0;
+  r->_mp_d = (mp_ptr) &dummy_limb;
+}
+
+/* The utility of this function is a bit limited, since many functions
+   assigns the result variable using mpz_swap. */
+void
+mpz_init2 (mpz_t r, mp_bitcnt_t bits)
+{
+  mp_size_t rn;
+
+  bits -= (bits != 0);		/* Round down, except if 0 */
+  rn = 1 + bits / GMP_LIMB_BITS;
+
+  r->_mp_alloc = rn;
+  r->_mp_size = 0;
+  r->_mp_d = gmp_alloc_limbs (rn);
+}
+
+void
+mpz_clear (mpz_t r)
+{
+  if (r->_mp_alloc)
+    gmp_free_limbs (r->_mp_d, r->_mp_alloc);
+}
+
+static mp_ptr
+mpz_realloc (mpz_t r, mp_size_t size)
+{
+  size = GMP_MAX (size, 1);
+
+  if (r->_mp_alloc)
+    r->_mp_d = gmp_realloc_limbs (r->_mp_d, r->_mp_alloc, size);
+  else
+    r->_mp_d = gmp_alloc_limbs (size);
+  r->_mp_alloc = size;
+
+  if (GMP_ABS (r->_mp_size) > size)
+    r->_mp_size = 0;
+
+  return r->_mp_d;
+}
+
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
+#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc			\
+			  ? mpz_realloc(z,n)			\
+			  : (z)->_mp_d)
+
+/* MPZ assignment and basic conversions. */
+void
+mpz_set_si (mpz_t r, signed long int x)
+{
+  if (x >= 0)
+    mpz_set_ui (r, x);
+  else /* (x < 0) */
+    if (GMP_LIMB_BITS < GMP_ULONG_BITS)
+      {
+	mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
+	mpz_neg (r, r);
+      }
+  else
+    {
+      r->_mp_size = -1;
+      MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
+    }
+}
+
+void
+mpz_set_ui (mpz_t r, unsigned long int x)
+{
+  if (x > 0)
+    {
+      r->_mp_size = 1;
+      MPZ_REALLOC (r, 1)[0] = x;
+      if (GMP_LIMB_BITS < GMP_ULONG_BITS)
+	{
+	  int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
+	  while (x >>= LOCAL_GMP_LIMB_BITS)
+	    {
+	      ++ r->_mp_size;
+	      MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
+	    }
+	}
+    }
+  else
+    r->_mp_size = 0;
+}
+
+void
+mpz_set (mpz_t r, const mpz_t x)
+{
+  /* Allow the NOP r == x */
+  if (r != x)
+    {
+      mp_size_t n;
+      mp_ptr rp;
+
+      n = GMP_ABS (x->_mp_size);
+      rp = MPZ_REALLOC (r, n);
+
+      mpn_copyi (rp, x->_mp_d, n);
+      r->_mp_size = x->_mp_size;
+    }
+}
+
+void
+mpz_init_set_si (mpz_t r, signed long int x)
+{
+  mpz_init (r);
+  mpz_set_si (r, x);
+}
+
+void
+mpz_init_set_ui (mpz_t r, unsigned long int x)
+{
+  mpz_init (r);
+  mpz_set_ui (r, x);
+}
+
+void
+mpz_init_set (mpz_t r, const mpz_t x)
+{
+  mpz_init (r);
+  mpz_set (r, x);
+}
+
+int
+mpz_fits_slong_p (const mpz_t u)
+{
+  return mpz_cmp_si (u, LONG_MAX) <= 0 && mpz_cmp_si (u, LONG_MIN) >= 0;
+}
+
+static int
+mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
+{
+  int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
+  mp_limb_t ulongrem = 0;
+
+  if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
+    ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1;
+
+  return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
+}
+
+int
+mpz_fits_ulong_p (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us);
+}
+
+int
+mpz_fits_sint_p (const mpz_t u)
+{
+  return mpz_cmp_si (u, INT_MAX) <= 0 && mpz_cmp_si (u, INT_MIN) >= 0;
+}
+
+int
+mpz_fits_uint_p (const mpz_t u)
+{
+  return u->_mp_size >= 0 && mpz_cmpabs_ui (u, UINT_MAX) <= 0;
+}
+
+int
+mpz_fits_sshort_p (const mpz_t u)
+{
+  return mpz_cmp_si (u, SHRT_MAX) <= 0 && mpz_cmp_si (u, SHRT_MIN) >= 0;
+}
+
+int
+mpz_fits_ushort_p (const mpz_t u)
+{
+  return u->_mp_size >= 0 && mpz_cmpabs_ui (u, USHRT_MAX) <= 0;
+}
+
+long int
+mpz_get_si (const mpz_t u)
+{
+  unsigned long r = mpz_get_ui (u);
+  unsigned long c = -LONG_MAX - LONG_MIN;
+
+  if (u->_mp_size < 0)
+    /* This expression is necessary to properly handle -LONG_MIN */
+    return -(long) c - (long) ((r - c) & LONG_MAX);
+  else
+    return (long) (r & LONG_MAX);
+}
+
+unsigned long int
+mpz_get_ui (const mpz_t u)
+{
+  if (GMP_LIMB_BITS < GMP_ULONG_BITS)
+    {
+      int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
+      unsigned long r = 0;
+      mp_size_t n = GMP_ABS (u->_mp_size);
+      n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
+      while (--n >= 0)
+	r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
+      return r;
+    }
+
+  return u->_mp_size == 0 ? 0 : u->_mp_d[0];
+}
+
+size_t
+mpz_size (const mpz_t u)
+{
+  return GMP_ABS (u->_mp_size);
+}
+
+mp_limb_t
+mpz_getlimbn (const mpz_t u, mp_size_t n)
+{
+  if (n >= 0 && n < GMP_ABS (u->_mp_size))
+    return u->_mp_d[n];
+  else
+    return 0;
+}
+
+void
+mpz_realloc2 (mpz_t x, mp_bitcnt_t n)
+{
+  mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS);
+}
+
+mp_srcptr
+mpz_limbs_read (mpz_srcptr x)
+{
+  return x->_mp_d;
+}
+
+mp_ptr
+mpz_limbs_modify (mpz_t x, mp_size_t n)
+{
+  assert (n > 0);
+  return MPZ_REALLOC (x, n);
+}
+
+mp_ptr
+mpz_limbs_write (mpz_t x, mp_size_t n)
+{
+  return mpz_limbs_modify (x, n);
+}
+
+void
+mpz_limbs_finish (mpz_t x, mp_size_t xs)
+{
+  mp_size_t xn;
+  xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs));
+  x->_mp_size = xs < 0 ? -xn : xn;
+}
+
+static mpz_srcptr
+mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
+{
+  x->_mp_alloc = 0;
+  x->_mp_d = (mp_ptr) xp;
+  x->_mp_size = xs;
+  return x;
+}
+
+mpz_srcptr
+mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
+{
+  mpz_roinit_normal_n (x, xp, xs);
+  mpz_limbs_finish (x, xs);
+  return x;
+}
+
+
+/* Conversions and comparison to double. */
+void
+mpz_set_d (mpz_t r, double x)
+{
+  int sign;
+  mp_ptr rp;
+  mp_size_t rn, i;
+  double B;
+  double Bi;
+  mp_limb_t f;
+
+  /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
+     zero or infinity. */
+  if (x != x || x == x * 0.5)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  sign = x < 0.0 ;
+  if (sign)
+    x = - x;
+
+  if (x < 1.0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
+  Bi = 1.0 / B;
+  for (rn = 1; x >= B; rn++)
+    x *= Bi;
+
+  rp = MPZ_REALLOC (r, rn);
+
+  f = (mp_limb_t) x;
+  x -= f;
+  assert (x < 1.0);
+  i = rn-1;
+  rp[i] = f;
+  while (--i >= 0)
+    {
+      x = B * x;
+      f = (mp_limb_t) x;
+      x -= f;
+      assert (x < 1.0);
+      rp[i] = f;
+    }
+
+  r->_mp_size = sign ? - rn : rn;
+}
+
+void
+mpz_init_set_d (mpz_t r, double x)
+{
+  mpz_init (r);
+  mpz_set_d (r, x);
+}
+
+double
+mpz_get_d (const mpz_t u)
+{
+  int m;
+  mp_limb_t l;
+  mp_size_t un;
+  double x;
+  double B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    return 0.0;
+
+  l = u->_mp_d[--un];
+  gmp_clz (m, l);
+  m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
+  if (m < 0)
+    l &= GMP_LIMB_MAX << -m;
+
+  for (x = l; --un >= 0;)
+    {
+      x = B*x;
+      if (m > 0) {
+	l = u->_mp_d[un];
+	m -= GMP_LIMB_BITS;
+	if (m < 0)
+	  l &= GMP_LIMB_MAX << -m;
+	x += l;
+      }
+    }
+
+  if (u->_mp_size < 0)
+    x = -x;
+
+  return x;
+}
+
+int
+mpz_cmpabs_d (const mpz_t x, double d)
+{
+  mp_size_t xn;
+  double B, Bi;
+  mp_size_t i;
+
+  xn = x->_mp_size;
+  d = GMP_ABS (d);
+
+  if (xn != 0)
+    {
+      xn = GMP_ABS (xn);
+
+      B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
+      Bi = 1.0 / B;
+
+      /* Scale d so it can be compared with the top limb. */
+      for (i = 1; i < xn; i++)
+	d *= Bi;
+
+      if (d >= B)
+	return -1;
+
+      /* Compare floor(d) to top limb, subtract and cancel when equal. */
+      for (i = xn; i-- > 0;)
+	{
+	  mp_limb_t f, xl;
+
+	  f = (mp_limb_t) d;
+	  xl = x->_mp_d[i];
+	  if (xl > f)
+	    return 1;
+	  else if (xl < f)
+	    return -1;
+	  d = B * (d - f);
+	}
+    }
+  return - (d > 0.0);
+}
+
+int
+mpz_cmp_d (const mpz_t x, double d)
+{
+  if (x->_mp_size < 0)
+    {
+      if (d >= 0.0)
+	return -1;
+      else
+	return -mpz_cmpabs_d (x, d);
+    }
+  else
+    {
+      if (d < 0.0)
+	return 1;
+      else
+	return mpz_cmpabs_d (x, d);
+    }
+}
+
+
+/* MPZ comparisons and the like. */
+int
+mpz_sgn (const mpz_t u)
+{
+  return GMP_CMP (u->_mp_size, 0);
+}
+
+int
+mpz_cmp_si (const mpz_t u, long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (v >= 0)
+    return mpz_cmp_ui (u, v);
+  else if (usize >= 0)
+    return 1;
+  else
+    return - mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, v));
+}
+
+int
+mpz_cmp_ui (const mpz_t u, unsigned long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize < 0)
+    return -1;
+  else
+    return mpz_cmpabs_ui (u, v);
+}
+
+int
+mpz_cmp (const mpz_t a, const mpz_t b)
+{
+  mp_size_t asize = a->_mp_size;
+  mp_size_t bsize = b->_mp_size;
+
+  if (asize != bsize)
+    return (asize < bsize) ? -1 : 1;
+  else if (asize >= 0)
+    return mpn_cmp (a->_mp_d, b->_mp_d, asize);
+  else
+    return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
+}
+
+int
+mpz_cmpabs_ui (const mpz_t u, unsigned long v)
+{
+  mp_size_t un = GMP_ABS (u->_mp_size);
+
+  if (! mpn_absfits_ulong_p (u->_mp_d, un))
+    return 1;
+  else
+    {
+      unsigned long uu = mpz_get_ui (u);
+      return GMP_CMP(uu, v);
+    }
+}
+
+int
+mpz_cmpabs (const mpz_t u, const mpz_t v)
+{
+  return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
+		   v->_mp_d, GMP_ABS (v->_mp_size));
+}
+
+void
+mpz_abs (mpz_t r, const mpz_t u)
+{
+  mpz_set (r, u);
+  r->_mp_size = GMP_ABS (r->_mp_size);
+}
+
+void
+mpz_neg (mpz_t r, const mpz_t u)
+{
+  mpz_set (r, u);
+  r->_mp_size = -r->_mp_size;
+}
+
+void
+mpz_swap (mpz_t u, mpz_t v)
+{
+  MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
+  MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size);
+}
+
+
+/* MPZ addition and subtraction */
+
+
+void
+mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mpz_t bb;
+  mpz_init_set_ui (bb, b);
+  mpz_add (r, a, bb);
+  mpz_clear (bb);
+}
+
+void
+mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mpz_ui_sub (r, b, a);
+  mpz_neg (r, r);
+}
+
+void
+mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
+{
+  mpz_neg (r, b);
+  mpz_add_ui (r, r, a);
+}
+
+static mp_size_t
+mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  mp_ptr rp;
+  mp_limb_t cy;
+
+  if (an < bn)
+    {
+      MPZ_SRCPTR_SWAP (a, b);
+      MP_SIZE_T_SWAP (an, bn);
+    }
+
+  rp = MPZ_REALLOC (r, an + 1);
+  cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
+
+  rp[an] = cy;
+
+  return an + cy;
+}
+
+static mp_size_t
+mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  int cmp;
+  mp_ptr rp;
+
+  cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
+  if (cmp > 0)
+    {
+      rp = MPZ_REALLOC (r, an);
+      gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
+      return mpn_normalized_size (rp, an);
+    }
+  else if (cmp < 0)
+    {
+      rp = MPZ_REALLOC (r, bn);
+      gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
+      return -mpn_normalized_size (rp, bn);
+    }
+  else
+    return 0;
+}
+
+void
+mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_add (r, a, b);
+  else
+    rn = mpz_abs_sub (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+void
+mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_sub (r, a, b);
+  else
+    rn = mpz_abs_add (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+
+/* MPZ multiplication */
+void
+mpz_mul_si (mpz_t r, const mpz_t u, long int v)
+{
+  if (v < 0)
+    {
+      mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
+      mpz_neg (r, r);
+    }
+  else
+    mpz_mul_ui (r, u, v);
+}
+
+void
+mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mpz_t vv;
+  mpz_init_set_ui (vv, v);
+  mpz_mul (r, u, vv);
+  mpz_clear (vv);
+  return;
+}
+
+void
+mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  int sign;
+  mp_size_t un, vn, rn;
+  mpz_t t;
+  mp_ptr tp;
+
+  un = u->_mp_size;
+  vn = v->_mp_size;
+
+  if (un == 0 || vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  sign = (un ^ vn) < 0;
+
+  un = GMP_ABS (un);
+  vn = GMP_ABS (vn);
+
+  mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
+
+  tp = t->_mp_d;
+  if (un >= vn)
+    mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
+  else
+    mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
+
+  rn = un + vn;
+  rn -= tp[rn-1] == 0;
+
+  t->_mp_size = sign ? - rn : rn;
+  mpz_swap (r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
+{
+  mp_size_t un, rn;
+  mp_size_t limbs;
+  unsigned shift;
+  mp_ptr rp;
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  limbs = bits / GMP_LIMB_BITS;
+  shift = bits % GMP_LIMB_BITS;
+
+  rn = un + limbs + (shift > 0);
+  rp = MPZ_REALLOC (r, rn);
+  if (shift > 0)
+    {
+      mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
+      rp[rn-1] = cy;
+      rn -= (cy == 0);
+    }
+  else
+    mpn_copyd (rp + limbs, u->_mp_d, un);
+
+  mpn_zero (rp, limbs);
+
+  r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
+}
+
+void
+mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mpz_t t;
+  mpz_init_set_ui (t, v);
+  mpz_mul (t, u, t);
+  mpz_add (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mpz_t t;
+  mpz_init_set_ui (t, v);
+  mpz_mul (t, u, t);
+  mpz_sub (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul (t, u, v);
+  mpz_add (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_submul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul (t, u, v);
+  mpz_sub (r, r, t);
+  mpz_clear (t);
+}
+
+
+/* MPZ division */
+enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
+
+/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
+static int
+mpz_div_qr (mpz_t q, mpz_t r,
+	    const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
+{
+  mp_size_t ns, ds, nn, dn, qs;
+  ns = n->_mp_size;
+  ds = d->_mp_size;
+
+  if (ds == 0)
+    gmp_die("mpz_div_qr: Divide by zero.");
+
+  if (ns == 0)
+    {
+      if (q)
+	q->_mp_size = 0;
+      if (r)
+	r->_mp_size = 0;
+      return 0;
+    }
+
+  nn = GMP_ABS (ns);
+  dn = GMP_ABS (ds);
+
+  qs = ds ^ ns;
+
+  if (nn < dn)
+    {
+      if (mode == GMP_DIV_CEIL && qs >= 0)
+	{
+	  /* q = 1, r = n - d */
+	  if (r)
+	    mpz_sub (r, n, d);
+	  if (q)
+	    mpz_set_ui (q, 1);
+	}
+      else if (mode == GMP_DIV_FLOOR && qs < 0)
+	{
+	  /* q = -1, r = n + d */
+	  if (r)
+	    mpz_add (r, n, d);
+	  if (q)
+	    mpz_set_si (q, -1);
+	}
+      else
+	{
+	  /* q = 0, r = d */
+	  if (r)
+	    mpz_set (r, n);
+	  if (q)
+	    q->_mp_size = 0;
+	}
+      return 1;
+    }
+  else
+    {
+      mp_ptr np, qp;
+      mp_size_t qn, rn;
+      mpz_t tq, tr;
+
+      mpz_init_set (tr, n);
+      np = tr->_mp_d;
+
+      qn = nn - dn + 1;
+
+      if (q)
+	{
+	  mpz_init2 (tq, qn * GMP_LIMB_BITS);
+	  qp = tq->_mp_d;
+	}
+      else
+	qp = NULL;
+
+      mpn_div_qr (qp, np, nn, d->_mp_d, dn);
+
+      if (qp)
+	{
+	  qn -= (qp[qn-1] == 0);
+
+	  tq->_mp_size = qs < 0 ? -qn : qn;
+	}
+      rn = mpn_normalized_size (np, dn);
+      tr->_mp_size = ns < 0 ? - rn : rn;
+
+      if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
+	{
+	  if (q)
+	    mpz_sub_ui (tq, tq, 1);
+	  if (r)
+	    mpz_add (tr, tr, d);
+	}
+      else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
+	{
+	  if (q)
+	    mpz_add_ui (tq, tq, 1);
+	  if (r)
+	    mpz_sub (tr, tr, d);
+	}
+
+      if (q)
+	{
+	  mpz_swap (tq, q);
+	  mpz_clear (tq);
+	}
+      if (r)
+	mpz_swap (tr, r);
+
+      mpz_clear (tr);
+
+      return rn != 0;
+    }
+}
+
+void
+mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
+}
+
+static void
+mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
+		enum mpz_div_round_mode mode)
+{
+  mp_size_t un, qn;
+  mp_size_t limb_cnt;
+  mp_ptr qp;
+  int adjust;
+
+  un = u->_mp_size;
+  if (un == 0)
+    {
+      q->_mp_size = 0;
+      return;
+    }
+  limb_cnt = bit_index / GMP_LIMB_BITS;
+  qn = GMP_ABS (un) - limb_cnt;
+  bit_index %= GMP_LIMB_BITS;
+
+  if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
+    /* Note: Below, the final indexing at limb_cnt is valid because at
+       that point we have qn > 0. */
+    adjust = (qn <= 0
+	      || !mpn_zero_p (u->_mp_d, limb_cnt)
+	      || (u->_mp_d[limb_cnt]
+		  & (((mp_limb_t) 1 << bit_index) - 1)));
+  else
+    adjust = 0;
+
+  if (qn <= 0)
+    qn = 0;
+  else
+    {
+      qp = MPZ_REALLOC (q, qn);
+
+      if (bit_index != 0)
+	{
+	  mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
+	  qn -= qp[qn - 1] == 0;
+	}
+      else
+	{
+	  mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
+	}
+    }
+
+  q->_mp_size = qn;
+
+  if (adjust)
+    mpz_add_ui (q, q, 1);
+  if (un < 0)
+    mpz_neg (q, q);
+}
+
+static void
+mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
+		enum mpz_div_round_mode mode)
+{
+  mp_size_t us, un, rn;
+  mp_ptr rp;
+  mp_limb_t mask;
+
+  us = u->_mp_size;
+  if (us == 0 || bit_index == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+  assert (rn > 0);
+
+  rp = MPZ_REALLOC (r, rn);
+  un = GMP_ABS (us);
+
+  mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
+
+  if (rn > un)
+    {
+      /* Quotient (with truncation) is zero, and remainder is
+	 non-zero */
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+	{
+	  /* Have to negate and sign extend. */
+	  mp_size_t i;
+
+	  gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un));
+	  for (i = un; i < rn - 1; i++)
+	    rp[i] = GMP_LIMB_MAX;
+
+	  rp[rn-1] = mask;
+	  us = -us;
+	}
+      else
+	{
+	  /* Just copy */
+	  if (r != u)
+	    mpn_copyi (rp, u->_mp_d, un);
+
+	  rn = un;
+	}
+    }
+  else
+    {
+      if (r != u)
+	mpn_copyi (rp, u->_mp_d, rn - 1);
+
+      rp[rn-1] = u->_mp_d[rn-1] & mask;
+
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+	{
+	  /* If r != 0, compute 2^{bit_count} - r. */
+	  mpn_neg (rp, rp, rn);
+
+	  rp[rn-1] &= mask;
+
+	  /* us is not used for anything else, so we can modify it
+	     here to indicate flipped sign. */
+	  us = -us;
+	}
+    }
+  rn = mpn_normalized_size (rp, rn);
+  r->_mp_size = us < 0 ? -rn : rn;
+}
+
+void
+mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_p (const mpz_t n, const mpz_t d)
+{
+  return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+int
+mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m)
+{
+  mpz_t t;
+  int res;
+
+  /* a == b (mod 0) iff a == b */
+  if (mpz_sgn (m) == 0)
+    return (mpz_cmp (a, b) == 0);
+
+  mpz_init (t);
+  mpz_sub (t, a, b);
+  res = mpz_divisible_p (t, m);
+  mpz_clear (t);
+
+  return res;
+}
+
+static unsigned long
+mpz_div_qr_ui (mpz_t q, mpz_t r,
+	       const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
+{
+  unsigned long ret;
+  mpz_t rr, dd;
+
+  mpz_init (rr);
+  mpz_init_set_ui (dd, d);
+  mpz_div_qr (q, rr, n, dd, mode);
+  mpz_clear (dd);
+  ret = mpz_get_ui (rr);
+
+  if (r)
+    mpz_swap (r, rr);
+  mpz_clear (rr);
+
+  return ret;
+}
+
+unsigned long
+mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
+}
+unsigned long
+mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+unsigned long
+mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_ui_p (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+
+/* GCD */
+static mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  unsigned shift;
+
+  assert ( (u | v) > 0);
+
+  if (u == 0)
+    return v;
+  else if (v == 0)
+    return u;
+
+  gmp_ctz (shift, u | v);
+
+  u >>= shift;
+  v >>= shift;
+
+  if ( (u & 1) == 0)
+    MP_LIMB_T_SWAP (u, v);
+
+  while ( (v & 1) == 0)
+    v >>= 1;
+
+  while (u != v)
+    {
+      if (u > v)
+	{
+	  u -= v;
+	  do
+	    u >>= 1;
+	  while ( (u & 1) == 0);
+	}
+      else
+	{
+	  v -= u;
+	  do
+	    v >>= 1;
+	  while ( (v & 1) == 0);
+	}
+    }
+  return u << shift;
+}
+
+unsigned long
+mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
+{
+  mpz_t t;
+  mpz_init_set_ui(t, v);
+  mpz_gcd (t, u, t);
+  if (v > 0)
+    v = mpz_get_ui (t);
+
+  if (g)
+    mpz_swap (t, g);
+
+  mpz_clear (t);
+
+  return v;
+}
+
+static mp_bitcnt_t
+mpz_make_odd (mpz_t r)
+{
+  mp_bitcnt_t shift;
+
+  assert (r->_mp_size > 0);
+  /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
+  shift = mpn_scan1 (r->_mp_d, 0);
+  mpz_tdiv_q_2exp (r, r, shift);
+
+  return shift;
+}
+
+void
+mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv;
+  mp_bitcnt_t uz, vz, gz;
+
+  if (u->_mp_size == 0)
+    {
+      mpz_abs (g, v);
+      return;
+    }
+  if (v->_mp_size == 0)
+    {
+      mpz_abs (g, u);
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+
+  mpz_abs (tu, u);
+  uz = mpz_make_odd (tu);
+  mpz_abs (tv, v);
+  vz = mpz_make_odd (tv);
+  gz = GMP_MIN (uz, vz);
+
+  if (tu->_mp_size < tv->_mp_size)
+    mpz_swap (tu, tv);
+
+  mpz_tdiv_r (tu, tu, tv);
+  if (tu->_mp_size == 0)
+    {
+      mpz_swap (g, tv);
+    }
+  else
+    for (;;)
+      {
+	int c;
+
+	mpz_make_odd (tu);
+	c = mpz_cmp (tu, tv);
+	if (c == 0)
+	  {
+	    mpz_swap (g, tu);
+	    break;
+	  }
+	if (c < 0)
+	  mpz_swap (tu, tv);
+
+	if (tv->_mp_size == 1)
+	  {
+	    mp_limb_t *gp;
+
+	    mpz_tdiv_r (tu, tu, tv);
+	    gp = MPZ_REALLOC (g, 1); /* gp = mpz_limbs_modify (g, 1); */
+	    *gp = mpn_gcd_11 (tu->_mp_d[0], tv->_mp_d[0]);
+
+	    g->_mp_size = *gp != 0; /* mpz_limbs_finish (g, 1); */
+	    break;
+	  }
+	mpz_sub (tu, tu, tv);
+      }
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_mul_2exp (g, g, gz);
+}
+
+void
+mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv, s0, s1, t0, t1;
+  mp_bitcnt_t uz, vz, gz;
+  mp_bitcnt_t power;
+
+  if (u->_mp_size == 0)
+    {
+      /* g = 0 u + sgn(v) v */
+      signed long sign = mpz_sgn (v);
+      mpz_abs (g, v);
+      if (s)
+	s->_mp_size = 0;
+      if (t)
+	mpz_set_si (t, sign);
+      return;
+    }
+
+  if (v->_mp_size == 0)
+    {
+      /* g = sgn(u) u + 0 v */
+      signed long sign = mpz_sgn (u);
+      mpz_abs (g, u);
+      if (s)
+	mpz_set_si (s, sign);
+      if (t)
+	t->_mp_size = 0;
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+  mpz_init (s0);
+  mpz_init (s1);
+  mpz_init (t0);
+  mpz_init (t1);
+
+  mpz_abs (tu, u);
+  uz = mpz_make_odd (tu);
+  mpz_abs (tv, v);
+  vz = mpz_make_odd (tv);
+  gz = GMP_MIN (uz, vz);
+
+  uz -= gz;
+  vz -= gz;
+
+  /* Cofactors corresponding to odd gcd. gz handled later. */
+  if (tu->_mp_size < tv->_mp_size)
+    {
+      mpz_swap (tu, tv);
+      MPZ_SRCPTR_SWAP (u, v);
+      MPZ_PTR_SWAP (s, t);
+      MP_BITCNT_T_SWAP (uz, vz);
+    }
+
+  /* Maintain
+   *
+   * u = t0 tu + t1 tv
+   * v = s0 tu + s1 tv
+   *
+   * where u and v denote the inputs with common factors of two
+   * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
+   *
+   * 2^p tu =  s1 u - t1 v
+   * 2^p tv = -s0 u + t0 v
+   */
+
+  /* After initial division, tu = q tv + tu', we have
+   *
+   * u = 2^uz (tu' + q tv)
+   * v = 2^vz tv
+   *
+   * or
+   *
+   * t0 = 2^uz, t1 = 2^uz q
+   * s0 = 0,    s1 = 2^vz
+   */
+
+  mpz_tdiv_qr (t1, tu, tu, tv);
+  mpz_mul_2exp (t1, t1, uz);
+
+  mpz_setbit (s1, vz);
+  power = uz + vz;
+
+  if (tu->_mp_size > 0)
+    {
+      mp_bitcnt_t shift;
+      shift = mpz_make_odd (tu);
+      mpz_setbit (t0, uz + shift);
+      power += shift;
+
+      for (;;)
+	{
+	  int c;
+	  c = mpz_cmp (tu, tv);
+	  if (c == 0)
+	    break;
+
+	  if (c < 0)
+	    {
+	      /* tv = tv' + tu
+	       *
+	       * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
+	       * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
+
+	      mpz_sub (tv, tv, tu);
+	      mpz_add (t0, t0, t1);
+	      mpz_add (s0, s0, s1);
+
+	      shift = mpz_make_odd (tv);
+	      mpz_mul_2exp (t1, t1, shift);
+	      mpz_mul_2exp (s1, s1, shift);
+	    }
+	  else
+	    {
+	      mpz_sub (tu, tu, tv);
+	      mpz_add (t1, t0, t1);
+	      mpz_add (s1, s0, s1);
+
+	      shift = mpz_make_odd (tu);
+	      mpz_mul_2exp (t0, t0, shift);
+	      mpz_mul_2exp (s0, s0, shift);
+	    }
+	  power += shift;
+	}
+    }
+  else
+    mpz_setbit (t0, uz);
+
+  /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
+     cofactors. */
+
+  mpz_mul_2exp (tv, tv, gz);
+  mpz_neg (s0, s0);
+
+  /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
+     adjust cofactors, we need u / g and v / g */
+
+  mpz_divexact (s1, v, tv);
+  mpz_abs (s1, s1);
+  mpz_divexact (t1, u, tv);
+  mpz_abs (t1, t1);
+
+  while (power-- > 0)
+    {
+      /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
+      if (mpz_odd_p (s0) || mpz_odd_p (t0))
+	{
+	  mpz_sub (s0, s0, s1);
+	  mpz_add (t0, t0, t1);
+	}
+      assert (mpz_even_p (t0) && mpz_even_p (s0));
+      mpz_tdiv_q_2exp (s0, s0, 1);
+      mpz_tdiv_q_2exp (t0, t0, 1);
+    }
+
+  /* Arrange so that |s| < |u| / 2g */
+  mpz_add (s1, s0, s1);
+  if (mpz_cmpabs (s0, s1) > 0)
+    {
+      mpz_swap (s0, s1);
+      mpz_sub (t0, t0, t1);
+    }
+  if (u->_mp_size < 0)
+    mpz_neg (s0, s0);
+  if (v->_mp_size < 0)
+    mpz_neg (t0, t0);
+
+  mpz_swap (g, tv);
+  if (s)
+    mpz_swap (s, s0);
+  if (t)
+    mpz_swap (t, t0);
+
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_clear (s0);
+  mpz_clear (s1);
+  mpz_clear (t0);
+  mpz_clear (t1);
+}
+
+void
+mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t g;
+
+  if (u->_mp_size == 0 || v->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  mpz_init (g);
+
+  mpz_gcd (g, u, v);
+  mpz_divexact (g, u, g);
+  mpz_mul (r, g, v);
+
+  mpz_clear (g);
+  mpz_abs (r, r);
+}
+
+void
+mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
+{
+  if (v == 0 || u->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  v /= mpz_gcd_ui (NULL, u, v);
+  mpz_mul_ui (r, u, v);
+
+  mpz_abs (r, r);
+}
+
+int
+mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
+{
+  mpz_t g, tr;
+  int invertible;
+
+  if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
+    return 0;
+
+  mpz_init (g);
+  mpz_init (tr);
+
+  mpz_gcdext (g, tr, NULL, u, m);
+  invertible = (mpz_cmp_ui (g, 1) == 0);
+
+  if (invertible)
+    {
+      if (tr->_mp_size < 0)
+	{
+	  if (m->_mp_size >= 0)
+	    mpz_add (tr, tr, m);
+	  else
+	    mpz_sub (tr, tr, m);
+	}
+      mpz_swap (r, tr);
+    }
+
+  mpz_clear (g);
+  mpz_clear (tr);
+  return invertible;
+}
+
+
+/* Higher level operations (sqrt, pow and root) */
+
+void
+mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
+{
+  unsigned long bit;
+  mpz_t tr;
+  mpz_init_set_ui (tr, 1);
+
+  bit = GMP_ULONG_HIGHBIT;
+  do
+    {
+      mpz_mul (tr, tr, tr);
+      if (e & bit)
+	mpz_mul (tr, tr, b);
+      bit >>= 1;
+    }
+  while (bit > 0);
+
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+}
+
+void
+mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
+{
+  mpz_t b;
+
+  mpz_init_set_ui (b, blimb);
+  mpz_pow_ui (r, b, e);
+  mpz_clear (b);
+}
+
+void
+mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
+{
+  mpz_t tr;
+  mpz_t base;
+  mp_size_t en, mn;
+  mp_srcptr mp;
+  struct gmp_div_inverse minv;
+  unsigned shift;
+  mp_ptr tp = NULL;
+
+  en = GMP_ABS (e->_mp_size);
+  mn = GMP_ABS (m->_mp_size);
+  if (mn == 0)
+    gmp_die ("mpz_powm: Zero modulo.");
+
+  if (en == 0)
+    {
+      mpz_set_ui (r, mpz_cmpabs_ui (m, 1));
+      return;
+    }
+
+  mp = m->_mp_d;
+  mpn_div_qr_invert (&minv, mp, mn);
+  shift = minv.shift;
+
+  if (shift > 0)
+    {
+      /* To avoid shifts, we do all our reductions, except the final
+	 one, using a *normalized* m. */
+      minv.shift = 0;
+
+      tp = gmp_alloc_limbs (mn);
+      gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
+      mp = tp;
+    }
+
+  mpz_init (base);
+
+  if (e->_mp_size < 0)
+    {
+      if (!mpz_invert (base, b, m))
+	gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
+    }
+  else
+    {
+      mp_size_t bn;
+      mpz_abs (base, b);
+
+      bn = base->_mp_size;
+      if (bn >= mn)
+	{
+	  mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
+	  bn = mn;
+	}
+
+      /* We have reduced the absolute value. Now take care of the
+	 sign. Note that we get zero represented non-canonically as
+	 m. */
+      if (b->_mp_size < 0)
+	{
+	  mp_ptr bp = MPZ_REALLOC (base, mn);
+	  gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
+	  bn = mn;
+	}
+      base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
+    }
+  mpz_init_set_ui (tr, 1);
+
+  while (--en >= 0)
+    {
+      mp_limb_t w = e->_mp_d[en];
+      mp_limb_t bit;
+
+      bit = GMP_LIMB_HIGHBIT;
+      do
+	{
+	  mpz_mul (tr, tr, tr);
+	  if (w & bit)
+	    mpz_mul (tr, tr, base);
+	  if (tr->_mp_size > mn)
+	    {
+	      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+	      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+	    }
+	  bit >>= 1;
+	}
+      while (bit > 0);
+    }
+
+  /* Final reduction */
+  if (tr->_mp_size >= mn)
+    {
+      minv.shift = shift;
+      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+    }
+  if (tp)
+    gmp_free_limbs (tp, mn);
+
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+  mpz_clear (base);
+}
+
+void
+mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
+{
+  mpz_t e;
+
+  mpz_init_set_ui (e, elimb);
+  mpz_powm (r, b, e, m);
+  mpz_clear (e);
+}
+
+/* x=trunc(y^(1/z)), r=y-x^z */
+void
+mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
+{
+  int sgn;
+  mp_bitcnt_t bc;
+  mpz_t t, u;
+
+  sgn = y->_mp_size < 0;
+  if ((~z & sgn) != 0)
+    gmp_die ("mpz_rootrem: Negative argument, with even root.");
+  if (z == 0)
+    gmp_die ("mpz_rootrem: Zeroth root.");
+
+  if (mpz_cmpabs_ui (y, 1) <= 0) {
+    if (x)
+      mpz_set (x, y);
+    if (r)
+      r->_mp_size = 0;
+    return;
+  }
+
+  mpz_init (u);
+  mpz_init (t);
+  bc = (mpz_sizeinbase (y, 2) - 1) / z + 1;
+  mpz_setbit (t, bc);
+
+  if (z == 2) /* simplify sqrt loop: z-1 == 1 */
+    do {
+      mpz_swap (u, t);			/* u = x */
+      mpz_tdiv_q (t, y, u);		/* t = y/x */
+      mpz_add (t, t, u);		/* t = y/x + x */
+      mpz_tdiv_q_2exp (t, t, 1);	/* x'= (y/x + x)/2 */
+    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
+  else /* z != 2 */ {
+    mpz_t v;
+
+    mpz_init (v);
+    if (sgn)
+      mpz_neg (t, t);
+
+    do {
+      mpz_swap (u, t);			/* u = x */
+      mpz_pow_ui (t, u, z - 1);		/* t = x^(z-1) */
+      mpz_tdiv_q (t, y, t);		/* t = y/x^(z-1) */
+      mpz_mul_ui (v, u, z - 1);		/* v = x*(z-1) */
+      mpz_add (t, t, v);		/* t = y/x^(z-1) + x*(z-1) */
+      mpz_tdiv_q_ui (t, t, z);		/* x'=(y/x^(z-1) + x*(z-1))/z */
+    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
+
+    mpz_clear (v);
+  }
+
+  if (r) {
+    mpz_pow_ui (t, u, z);
+    mpz_sub (r, y, t);
+  }
+  if (x)
+    mpz_swap (x, u);
+  mpz_clear (u);
+  mpz_clear (t);
+}
+
+int
+mpz_root (mpz_t x, const mpz_t y, unsigned long z)
+{
+  int res;
+  mpz_t r;
+
+  mpz_init (r);
+  mpz_rootrem (x, r, y, z);
+  res = r->_mp_size == 0;
+  mpz_clear (r);
+
+  return res;
+}
+
+/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
+void
+mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
+{
+  mpz_rootrem (s, r, u, 2);
+}
+
+void
+mpz_sqrt (mpz_t s, const mpz_t u)
+{
+  mpz_rootrem (s, NULL, u, 2);
+}
+
+int
+mpz_perfect_square_p (const mpz_t u)
+{
+  if (u->_mp_size <= 0)
+    return (u->_mp_size == 0);
+  else
+    return mpz_root (NULL, u, 2);
+}
+
+int
+mpn_perfect_square_p (mp_srcptr p, mp_size_t n)
+{
+  mpz_t t;
+
+  assert (n > 0);
+  assert (p [n-1] != 0);
+  return mpz_root (NULL, mpz_roinit_normal_n (t, p, n), 2);
+}
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n)
+{
+  mpz_t s, r, u;
+  mp_size_t res;
+
+  assert (n > 0);
+  assert (p [n-1] != 0);
+
+  mpz_init (r);
+  mpz_init (s);
+  mpz_rootrem (s, r, mpz_roinit_normal_n (u, p, n), 2);
+
+  assert (s->_mp_size == (n+1)/2);
+  mpn_copyd (sp, s->_mp_d, s->_mp_size);
+  mpz_clear (s);
+  res = r->_mp_size;
+  if (rp)
+    mpn_copyd (rp, r->_mp_d, res);
+  mpz_clear (r);
+  return res;
+}
+
+/* Combinatorics */
+
+void
+mpz_mfac_uiui (mpz_t x, unsigned long n, unsigned long m)
+{
+  mpz_set_ui (x, n + (n == 0));
+  if (m + 1 < 2) return;
+  while (n > m + 1)
+    mpz_mul_ui (x, x, n -= m);
+}
+
+void
+mpz_2fac_ui (mpz_t x, unsigned long n)
+{
+  mpz_mfac_uiui (x, n, 2);
+}
+
+void
+mpz_fac_ui (mpz_t x, unsigned long n)
+{
+  mpz_mfac_uiui (x, n, 1);
+}
+
+void
+mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
+{
+  mpz_t t;
+
+  mpz_set_ui (r, k <= n);
+
+  if (k > (n >> 1))
+    k = (k <= n) ? n - k : 0;
+
+  mpz_init (t);
+  mpz_fac_ui (t, k);
+
+  for (; k > 0; --k)
+    mpz_mul_ui (r, r, n--);
+
+  mpz_divexact (r, r, t);
+  mpz_clear (t);
+}
+
+
+/* Primality testing */
+
+/* Computes Kronecker (a/b) with odd b, a!=0 and GCD(a,b) = 1 */
+/* Adapted from JACOBI_BASE_METHOD==4 in mpn/generic/jacbase.c */
+static int
+gmp_jacobi_coprime (mp_limb_t a, mp_limb_t b)
+{
+  int c, bit = 0;
+
+  assert (b & 1);
+  assert (a != 0);
+  /* assert (mpn_gcd_11 (a, b) == 1); */
+
+  /* Below, we represent a and b shifted right so that the least
+     significant one bit is implicit. */
+  b >>= 1;
+
+  gmp_ctz(c, a);
+  a >>= 1;
+
+  for (;;)
+    {
+      a >>= c;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (b ^ (b >> 1));
+      if (a < b)
+	{
+	  if (a == 0)
+	    return bit & 1 ? -1 : 1;
+	  bit ^= a & b;
+	  a = b - a;
+	  b -= a;
+	}
+      else
+	{
+	  a -= b;
+	  assert (a != 0);
+	}
+
+      gmp_ctz(c, a);
+      ++c;
+    }
+}
+
+static void
+gmp_lucas_step_k_2k (mpz_t V, mpz_t Qk, const mpz_t n)
+{
+  mpz_mod (Qk, Qk, n);
+  /* V_{2k} <- V_k ^ 2 - 2Q^k */
+  mpz_mul (V, V, V);
+  mpz_submul_ui (V, Qk, 2);
+  mpz_tdiv_r (V, V, n);
+  /* Q^{2k} = (Q^k)^2 */
+  mpz_mul (Qk, Qk, Qk);
+}
+
+/* Computes V_k, Q^k (mod n) for the Lucas' sequence */
+/* with P=1, Q=Q; k = (n>>b0)|1. */
+/* Requires an odd n > 4; b0 > 0; -2*Q must not overflow a long */
+/* Returns (U_k == 0) and sets V=V_k and Qk=Q^k. */
+static int
+gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q,
+	       mp_bitcnt_t b0, const mpz_t n)
+{
+  mp_bitcnt_t bs;
+  mpz_t U;
+  int res;
+
+  assert (b0 > 0);
+  assert (Q <= - (LONG_MIN / 2));
+  assert (Q >= - (LONG_MAX / 2));
+  assert (mpz_cmp_ui (n, 4) > 0);
+  assert (mpz_odd_p (n));
+
+  mpz_init_set_ui (U, 1); /* U1 = 1 */
+  mpz_set_ui (V, 1); /* V1 = 1 */
+  mpz_set_si (Qk, Q);
+
+  for (bs = mpz_sizeinbase (n, 2) - 1; --bs >= b0;)
+    {
+      /* U_{2k} <- U_k * V_k */
+      mpz_mul (U, U, V);
+      /* V_{2k} <- V_k ^ 2 - 2Q^k */
+      /* Q^{2k} = (Q^k)^2 */
+      gmp_lucas_step_k_2k (V, Qk, n);
+
+      /* A step k->k+1 is performed if the bit in $n$ is 1	*/
+      /* mpz_tstbit(n,bs) or the bit is 0 in $n$ but	*/
+      /* should be 1 in $n+1$ (bs == b0)			*/
+      if (b0 == bs || mpz_tstbit (n, bs))
+	{
+	  /* Q^{k+1} <- Q^k * Q */
+	  mpz_mul_si (Qk, Qk, Q);
+	  /* U_{k+1} <- (U_k + V_k) / 2 */
+	  mpz_swap (U, V); /* Keep in V the old value of U_k */
+	  mpz_add (U, U, V);
+	  /* We have to compute U/2, so we need an even value, */
+	  /* equivalent (mod n) */
+	  if (mpz_odd_p (U))
+	    mpz_add (U, U, n);
+	  mpz_tdiv_q_2exp (U, U, 1);
+	  /* V_{k+1} <-(D*U_k + V_k) / 2 =
+			U_{k+1} + (D-1)/2*U_k = U_{k+1} - 2Q*U_k */
+	  mpz_mul_si (V, V, -2*Q);
+	  mpz_add (V, U, V);
+	  mpz_tdiv_r (V, V, n);
+	}
+      mpz_tdiv_r (U, U, n);
+    }
+
+  res = U->_mp_size == 0;
+  mpz_clear (U);
+  return res;
+}
+
+/* Performs strong Lucas' test on x, with parameters suggested */
+/* for the BPSW test. Qk is only passed to recycle a variable. */
+/* Requires GCD (x,6) = 1.*/
+static int
+gmp_stronglucas (const mpz_t x, mpz_t Qk)
+{
+  mp_bitcnt_t b0;
+  mpz_t V, n;
+  mp_limb_t maxD, D; /* The absolute value is stored. */
+  long Q;
+  mp_limb_t tl;
+
+  /* Test on the absolute value. */
+  mpz_roinit_normal_n (n, x->_mp_d, GMP_ABS (x->_mp_size));
+
+  assert (mpz_odd_p (n));
+  /* assert (mpz_gcd_ui (NULL, n, 6) == 1); */
+  if (mpz_root (Qk, n, 2))
+    return 0; /* A square is composite. */
+
+  /* Check Ds up to square root (in case, n is prime)
+     or avoid overflows */
+  maxD = (Qk->_mp_size == 1) ? Qk->_mp_d [0] - 1 : GMP_LIMB_MAX;
+
+  D = 3;
+  /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,.. */
+  /* For those Ds we have (D/n) = (n/|D|) */
+  do
+    {
+      if (D >= maxD)
+	return 1 + (D != GMP_LIMB_MAX); /* (1 + ! ~ D) */
+      D += 2;
+      tl = mpz_tdiv_ui (n, D);
+      if (tl == 0)
+	return 0;
+    }
+  while (gmp_jacobi_coprime (tl, D) == 1);
+
+  mpz_init (V);
+
+  /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */
+  b0 = mpn_common_scan (~ n->_mp_d[0], 0, n->_mp_d, n->_mp_size, GMP_LIMB_MAX);
+  /* b0 = mpz_scan0 (n, 0); */
+
+  /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */
+  Q = (D & 2) ? (long) (D >> 2) + 1 : -(long) (D >> 2);
+
+  if (! gmp_lucas_mod (V, Qk, Q, b0, n))	/* If Ud != 0 */
+    while (V->_mp_size != 0 && --b0 != 0)	/* while Vk != 0 */
+      /* V <- V ^ 2 - 2Q^k */
+      /* Q^{2k} = (Q^k)^2 */
+      gmp_lucas_step_k_2k (V, Qk, n);
+
+  mpz_clear (V);
+  return (b0 != 0);
+}
+
+static int
+gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y,
+		 const mpz_t q, mp_bitcnt_t k)
+{
+  assert (k > 0);
+
+  /* Caller must initialize y to the base. */
+  mpz_powm (y, y, q, n);
+
+  if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
+    return 1;
+
+  while (--k > 0)
+    {
+      mpz_powm_ui (y, y, 2, n);
+      if (mpz_cmp (y, nm1) == 0)
+	return 1;
+    }
+  return 0;
+}
+
+/* This product is 0xc0cfd797, and fits in 32 bits. */
+#define GMP_PRIME_PRODUCT \
+  (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL)
+
+/* Bit (p+1)/2 is set, for each odd prime <= 61 */
+#define GMP_PRIME_MASK 0xc96996dcUL
+
+int
+mpz_probab_prime_p (const mpz_t n, int reps)
+{
+  mpz_t nm1;
+  mpz_t q;
+  mpz_t y;
+  mp_bitcnt_t k;
+  int is_prime;
+  int j;
+
+  /* Note that we use the absolute value of n only, for compatibility
+     with the real GMP. */
+  if (mpz_even_p (n))
+    return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0;
+
+  /* Above test excludes n == 0 */
+  assert (n->_mp_size != 0);
+
+  if (mpz_cmpabs_ui (n, 64) < 0)
+    return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2;
+
+  if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1)
+    return 0;
+
+  /* All prime factors are >= 31. */
+  if (mpz_cmpabs_ui (n, 31*31) < 0)
+    return 2;
+
+  mpz_init (nm1);
+  mpz_init (q);
+
+  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
+  mpz_abs (nm1, n);
+  nm1->_mp_d[0] -= 1;
+  /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
+  k = mpn_scan1 (nm1->_mp_d, 0);
+  mpz_tdiv_q_2exp (q, nm1, k);
+
+  /* BPSW test */
+  mpz_init_set_ui (y, 2);
+  is_prime = gmp_millerrabin (n, nm1, y, q, k) && gmp_stronglucas (n, y);
+  reps -= 24; /* skip the first 24 repetitions */
+
+  /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] =
+     j^2 + j + 41 using Euler's polynomial. We potentially stop early,
+     if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps >
+     30 (a[30] == 971 > 31*31 == 961). */
+
+  for (j = 0; is_prime & (j < reps); j++)
+    {
+      mpz_set_ui (y, (unsigned long) j*j+j+41);
+      if (mpz_cmp (y, nm1) >= 0)
+	{
+	  /* Don't try any further bases. This "early" break does not affect
+	     the result for any reasonable reps value (<=5000 was tested) */
+	  assert (j >= 30);
+	  break;
+	}
+      is_prime = gmp_millerrabin (n, nm1, y, q, k);
+    }
+  mpz_clear (nm1);
+  mpz_clear (q);
+  mpz_clear (y);
+
+  return is_prime;
+}
+
+
+/* Logical operations and bit manipulation. */
+
+/* Numbers are treated as if represented in two's complement (and
+   infinitely sign extended). For a negative values we get the two's
+   complement from -x = ~x + 1, where ~ is bitwise complement.
+   Negation transforms
+
+     xxxx10...0
+
+   into
+
+     yyyy10...0
+
+   where yyyy is the bitwise complement of xxxx. So least significant
+   bits, up to and including the first one bit, are unchanged, and
+   the more significant bits are all complemented.
+
+   To change a bit from zero to one in a negative number, subtract the
+   corresponding power of two from the absolute value. This can never
+   underflow. To change a bit from one to zero, add the corresponding
+   power of two, and this might overflow. E.g., if x = -001111, the
+   two's complement is 110001. Clearing the least significant bit, we
+   get two's complement 110000, and -010000. */
+
+int
+mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t limb_index;
+  unsigned shift;
+  mp_size_t ds;
+  mp_size_t dn;
+  mp_limb_t w;
+  int bit;
+
+  ds = d->_mp_size;
+  dn = GMP_ABS (ds);
+  limb_index = bit_index / GMP_LIMB_BITS;
+  if (limb_index >= dn)
+    return ds < 0;
+
+  shift = bit_index % GMP_LIMB_BITS;
+  w = d->_mp_d[limb_index];
+  bit = (w >> shift) & 1;
+
+  if (ds < 0)
+    {
+      /* d < 0. Check if any of the bits below is set: If so, our bit
+	 must be complemented. */
+      if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
+	return bit ^ 1;
+      while (--limb_index >= 0)
+	if (d->_mp_d[limb_index] > 0)
+	  return bit ^ 1;
+    }
+  return bit;
+}
+
+static void
+mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_limb_t bit;
+  mp_ptr dp;
+
+  dn = GMP_ABS (d->_mp_size);
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  if (limb_index >= dn)
+    {
+      mp_size_t i;
+      /* The bit should be set outside of the end of the number.
+	 We have to increase the size of the number. */
+      dp = MPZ_REALLOC (d, limb_index + 1);
+
+      dp[limb_index] = bit;
+      for (i = dn; i < limb_index; i++)
+	dp[i] = 0;
+      dn = limb_index + 1;
+    }
+  else
+    {
+      mp_limb_t cy;
+
+      dp = d->_mp_d;
+
+      cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
+      if (cy > 0)
+	{
+	  dp = MPZ_REALLOC (d, dn + 1);
+	  dp[dn++] = cy;
+	}
+    }
+
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+static void
+mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_ptr dp;
+  mp_limb_t bit;
+
+  dn = GMP_ABS (d->_mp_size);
+  dp = d->_mp_d;
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  assert (limb_index < dn);
+
+  gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
+				 dn - limb_index, bit));
+  dn = mpn_normalized_size (dp, dn);
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+void
+mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (!mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+	mpz_abs_add_bit (d, bit_index);
+      else
+	mpz_abs_sub_bit (d, bit_index);
+    }
+}
+
+void
+mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+	mpz_abs_sub_bit (d, bit_index);
+      else
+	mpz_abs_add_bit (d, bit_index);
+    }
+}
+
+void
+mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
+    mpz_abs_sub_bit (d, bit_index);
+  else
+    mpz_abs_add_bit (d, bit_index);
+}
+
+void
+mpz_com (mpz_t r, const mpz_t u)
+{
+  mpz_add_ui (r, u, 1);
+  mpz_neg (r, r);
+}
+
+void
+mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc & vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is positive, higher limbs don't matter. */
+  rn = vx ? un : vn;
+
+  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul & vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul & vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc | vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is negative, by sign extension higher limbs
+     don't matter. */
+  rn = vx ? vn : un;
+
+  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul | vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul | vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc ^ vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  rp = MPZ_REALLOC (r, un + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = (ul ^ vl ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = (ul ^ ux) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[un++] = rc;
+  else
+    un = mpn_normalized_size (rp, un);
+
+  r->_mp_size = rx ? -un : un;
+}
+
+static unsigned
+gmp_popcount_limb (mp_limb_t x)
+{
+  unsigned c;
+
+  /* Do 16 bits at a time, to avoid limb-sized constants. */
+  int LOCAL_SHIFT_BITS = 16;
+  for (c = 0; x > 0;)
+    {
+      unsigned w = x - ((x >> 1) & 0x5555);
+      w = ((w >> 2) & 0x3333) + (w & 0x3333);
+      w =  (w >> 4) + w;
+      w = ((w >> 8) & 0x000f) + (w & 0x000f);
+      c += w;
+      if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)
+	x >>= LOCAL_SHIFT_BITS;
+      else
+	x = 0;
+    }
+  return c;
+}
+
+mp_bitcnt_t
+mpn_popcount (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  mp_bitcnt_t c;
+
+  for (c = 0, i = 0; i < n; i++)
+    c += gmp_popcount_limb (p[i]);
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_popcount (const mpz_t u)
+{
+  mp_size_t un;
+
+  un = u->_mp_size;
+
+  if (un < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  return mpn_popcount (u->_mp_d, un);
+}
+
+mp_bitcnt_t
+mpz_hamdist (const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_limb_t uc, vc, ul, vl, comp;
+  mp_srcptr up, vp;
+  mp_bitcnt_t c;
+
+  un = u->_mp_size;
+  vn = v->_mp_size;
+
+  if ( (un ^ vn) < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  comp = - (uc = vc = (un < 0));
+  if (uc)
+    {
+      assert (vn < 0);
+      un = -un;
+      vn = -vn;
+    }
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  if (un < vn)
+    MPN_SRCPTR_SWAP (up, un, vp, vn);
+
+  for (i = 0, c = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ comp) + vc;
+      vc = vl < vc;
+
+      c += gmp_popcount_limb (ul ^ vl);
+    }
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      c += gmp_popcount_limb (ul ^ comp);
+    }
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux;
+
+  us = u->_mp_size;
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
+     for u<0. Notice this test picks up any u==0 too. */
+  if (i >= un)
+    return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+  up = u->_mp_d;
+  ux = 0;
+  limb = up[i];
+
+  if (starting_bit != 0)
+    {
+      if (us < 0)
+	{
+	  ux = mpn_zero_p (up, i);
+	  limb = ~ limb + ux;
+	  ux = - (mp_limb_t) (limb >= ux);
+	}
+
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
+    }
+
+  return mpn_common_scan (limb, i, up, un, ux);
+}
+
+mp_bitcnt_t
+mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux;
+
+  us = u->_mp_size;
+  ux = - (mp_limb_t) (us >= 0);
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+     u<0.  Notice this test picks up all cases of u==0 too. */
+  if (i >= un)
+    return (ux ? starting_bit : ~(mp_bitcnt_t) 0);
+
+  up = u->_mp_d;
+  limb = up[i] ^ ux;
+
+  if (ux == 0)
+    limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */
+
+  /* Mask all bits before starting_bit, thus ignoring them. */
+  limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
+
+  return mpn_common_scan (limb, i, up, un, ux);
+}
+
+
+/* MPZ base conversion. */
+
+size_t
+mpz_sizeinbase (const mpz_t u, int base)
+{
+  mp_size_t un, tn;
+  mp_srcptr up;
+  mp_ptr tp;
+  mp_bitcnt_t bits;
+  struct gmp_div_inverse bi;
+  size_t ndigits;
+
+  assert (base >= 2);
+  assert (base <= 62);
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    return 1;
+
+  up = u->_mp_d;
+
+  bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
+  switch (base)
+    {
+    case 2:
+      return bits;
+    case 4:
+      return (bits + 1) / 2;
+    case 8:
+      return (bits + 2) / 3;
+    case 16:
+      return (bits + 3) / 4;
+    case 32:
+      return (bits + 4) / 5;
+      /* FIXME: Do something more clever for the common case of base
+	 10. */
+    }
+
+  tp = gmp_alloc_limbs (un);
+  mpn_copyi (tp, up, un);
+  mpn_div_qr_1_invert (&bi, base);
+
+  tn = un;
+  ndigits = 0;
+  do
+    {
+      ndigits++;
+      mpn_div_qr_1_preinv (tp, tp, tn, &bi);
+      tn -= (tp[tn-1] == 0);
+    }
+  while (tn > 0);
+
+  gmp_free_limbs (tp, un);
+  return ndigits;
+}
+
+char *
+mpz_get_str (char *sp, int base, const mpz_t u)
+{
+  unsigned bits;
+  const char *digits;
+  mp_size_t un;
+  size_t i, sn, osn;
+
+  digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+  if (base > 1)
+    {
+      if (base <= 36)
+	digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+      else if (base > 62)
+	return NULL;
+    }
+  else if (base >= -1)
+    base = 10;
+  else
+    {
+      base = -base;
+      if (base > 36)
+	return NULL;
+    }
+
+  sn = 1 + mpz_sizeinbase (u, base);
+  if (!sp)
+    {
+      osn = 1 + sn;
+      sp = (char *) gmp_alloc (osn);
+    }
+  else
+    osn = 0;
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    {
+      sp[0] = '0';
+      sn = 1;
+      goto ret;
+    }
+
+  i = 0;
+
+  if (u->_mp_size < 0)
+    sp[i++] = '-';
+
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits)
+    /* Not modified in this case. */
+    sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
+  else
+    {
+      struct mpn_base_info info;
+      mp_ptr tp;
+
+      mpn_get_base_info (&info, base);
+      tp = gmp_alloc_limbs (un);
+      mpn_copyi (tp, u->_mp_d, un);
+
+      sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
+      gmp_free_limbs (tp, un);
+    }
+
+  for (; i < sn; i++)
+    sp[i] = digits[(unsigned char) sp[i]];
+
+ret:
+  sp[sn] = '\0';
+  if (osn && osn != sn + 1)
+    sp = (char*) gmp_realloc (sp, osn, sn + 1);
+  return sp;
+}
+
+int
+mpz_set_str (mpz_t r, const char *sp, int base)
+{
+  unsigned bits, value_of_a;
+  mp_size_t rn, alloc;
+  mp_ptr rp;
+  size_t dn, sn;
+  int sign;
+  unsigned char *dp;
+
+  assert (base == 0 || (base >= 2 && base <= 62));
+
+  while (isspace( (unsigned char) *sp))
+    sp++;
+
+  sign = (*sp == '-');
+  sp += sign;
+
+  if (base == 0)
+    {
+      if (sp[0] == '0')
+	{
+	  if (sp[1] == 'x' || sp[1] == 'X')
+	    {
+	      base = 16;
+	      sp += 2;
+	    }
+	  else if (sp[1] == 'b' || sp[1] == 'B')
+	    {
+	      base = 2;
+	      sp += 2;
+	    }
+	  else
+	    base = 8;
+	}
+      else
+	base = 10;
+    }
+
+  if (!*sp)
+    {
+      r->_mp_size = 0;
+      return -1;
+    }
+  sn = strlen(sp);
+  dp = (unsigned char *) gmp_alloc (sn);
+
+  value_of_a = (base > 36) ? 36 : 10;
+  for (dn = 0; *sp; sp++)
+    {
+      unsigned digit;
+
+      if (isspace ((unsigned char) *sp))
+	continue;
+      else if (*sp >= '0' && *sp <= '9')
+	digit = *sp - '0';
+      else if (*sp >= 'a' && *sp <= 'z')
+	digit = *sp - 'a' + value_of_a;
+      else if (*sp >= 'A' && *sp <= 'Z')
+	digit = *sp - 'A' + 10;
+      else
+	digit = base; /* fail */
+
+      if (digit >= (unsigned) base)
+	{
+	  gmp_free (dp, sn);
+	  r->_mp_size = 0;
+	  return -1;
+	}
+
+      dp[dn++] = digit;
+    }
+
+  if (!dn)
+    {
+      gmp_free (dp, sn);
+      r->_mp_size = 0;
+      return -1;
+    }
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits > 0)
+    {
+      alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_bits (rp, dp, dn, bits);
+    }
+  else
+    {
+      struct mpn_base_info info;
+      mpn_get_base_info (&info, base);
+      alloc = (dn + info.exp - 1) / info.exp;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_other (rp, dp, dn, base, &info);
+      /* Normalization, needed for all-zero input. */
+      assert (rn > 0);
+      rn -= rp[rn-1] == 0;
+    }
+  assert (rn <= alloc);
+  gmp_free (dp, sn);
+
+  r->_mp_size = sign ? - rn : rn;
+
+  return 0;
+}
+
+int
+mpz_init_set_str (mpz_t r, const char *sp, int base)
+{
+  mpz_init (r);
+  return mpz_set_str (r, sp, base);
+}
+
+size_t
+mpz_out_str (FILE *stream, int base, const mpz_t x)
+{
+  char *str;
+  size_t len, n;
+
+  str = mpz_get_str (NULL, base, x);
+  if (!str)
+    return 0;
+  len = strlen (str);
+  n = fwrite (str, 1, len, stream);
+  gmp_free (str, len + 1);
+  return n;
+}
+
+
+static int
+gmp_detect_endian (void)
+{
+  static const int i = 2;
+  const unsigned char *p = (const unsigned char *) &i;
+  return 1 - *p;
+}
+
+/* Import and export. Does not support nails. */
+void
+mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
+	    size_t nails, const void *src)
+{
+  const unsigned char *p;
+  ptrdiff_t word_step;
+  mp_ptr rp;
+  mp_size_t rn;
+
+  /* The current (partial) limb. */
+  mp_limb_t limb;
+  /* The number of bytes already copied to this limb (starting from
+     the low end). */
+  size_t bytes;
+  /* The index where the limb should be stored, when completed. */
+  mp_size_t i;
+
+  if (nails != 0)
+    gmp_die ("mpz_import: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+
+  if (endian == 0)
+    endian = gmp_detect_endian ();
+
+  p = (unsigned char *) src;
+
+  word_step = (order != endian) ? 2 * size : 0;
+
+  /* Process bytes from the least significant end, so point p at the
+     least significant word. */
+  if (order == 1)
+    {
+      p += size * (count - 1);
+      word_step = - word_step;
+    }
+
+  /* And at least significant byte of that word. */
+  if (endian == 1)
+    p += (size - 1);
+
+  rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
+  rp = MPZ_REALLOC (r, rn);
+
+  for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
+    {
+      size_t j;
+      for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+	{
+	  limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
+	  if (bytes == sizeof(mp_limb_t))
+	    {
+	      rp[i++] = limb;
+	      bytes = 0;
+	      limb = 0;
+	    }
+	}
+    }
+  assert (i + (bytes > 0) == rn);
+  if (limb != 0)
+    rp[i++] = limb;
+  else
+    i = mpn_normalized_size (rp, i);
+
+  r->_mp_size = i;
+}
+
+void *
+mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
+	    size_t nails, const mpz_t u)
+{
+  size_t count;
+  mp_size_t un;
+
+  if (nails != 0)
+    gmp_die ("mpz_export: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+  assert (size > 0 || u->_mp_size == 0);
+
+  un = u->_mp_size;
+  count = 0;
+  if (un != 0)
+    {
+      size_t k;
+      unsigned char *p;
+      ptrdiff_t word_step;
+      /* The current (partial) limb. */
+      mp_limb_t limb;
+      /* The number of bytes left to do in this limb. */
+      size_t bytes;
+      /* The index where the limb was read. */
+      mp_size_t i;
+
+      un = GMP_ABS (un);
+
+      /* Count bytes in top limb. */
+      limb = u->_mp_d[un-1];
+      assert (limb != 0);
+
+      k = (GMP_LIMB_BITS <= CHAR_BIT);
+      if (!k)
+	{
+	  do {
+	    int LOCAL_CHAR_BIT = CHAR_BIT;
+	    k++; limb >>= LOCAL_CHAR_BIT;
+	  } while (limb != 0);
+	}
+      /* else limb = 0; */
+
+      count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
+
+      if (!r)
+	r = gmp_alloc (count * size);
+
+      if (endian == 0)
+	endian = gmp_detect_endian ();
+
+      p = (unsigned char *) r;
+
+      word_step = (order != endian) ? 2 * size : 0;
+
+      /* Process bytes from the least significant end, so point p at the
+	 least significant word. */
+      if (order == 1)
+	{
+	  p += size * (count - 1);
+	  word_step = - word_step;
+	}
+
+      /* And at least significant byte of that word. */
+      if (endian == 1)
+	p += (size - 1);
+
+      for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
+	{
+	  size_t j;
+	  for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian)
+	    {
+	      if (sizeof (mp_limb_t) == 1)
+		{
+		  if (i < un)
+		    *p = u->_mp_d[i++];
+		  else
+		    *p = 0;
+		}
+	      else
+		{
+		  int LOCAL_CHAR_BIT = CHAR_BIT;
+		  if (bytes == 0)
+		    {
+		      if (i < un)
+			limb = u->_mp_d[i++];
+		      bytes = sizeof (mp_limb_t);
+		    }
+		  *p = limb;
+		  limb >>= LOCAL_CHAR_BIT;
+		  bytes--;
+		}
+	    }
+	}
+      assert (i == un);
+      assert (k == count);
+    }
+
+  if (countp)
+    *countp = count;
+
+  return r;
+}

diff --git a/mini-gmp/mini-gmp.h b/mini-gmp/mini-gmp.h
new file mode 100644
index 0000000..59c24cf
--- /dev/null
+++ b/mini-gmp/mini-gmp.h

@@ -0,0 +1,310 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+Copyright 2011-2015, 2017, 2019-2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* About mini-gmp: This is a minimal implementation of a subset of the
+   GMP interface. It is intended for inclusion into applications which
+   have modest bignums needs, as a fallback when the real GMP library
+   is not installed.
+
+   This file defines the public interface. */
+
+#ifndef __MINI_GMP_H__
+#define __MINI_GMP_H__
+
+/* For size_t */
+#include <stddef.h>
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+void mp_set_memory_functions (void *(*) (size_t),
+			      void *(*) (void *, size_t, size_t),
+			      void (*) (void *, size_t));
+
+void mp_get_memory_functions (void *(**) (size_t),
+			      void *(**) (void *, size_t, size_t),
+			      void (**) (void *, size_t));
+
+#ifndef MINI_GMP_LIMB_TYPE
+#define MINI_GMP_LIMB_TYPE long
+#endif
+
+typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t;
+typedef long mp_size_t;
+typedef unsigned long mp_bitcnt_t;
+
+typedef mp_limb_t *mp_ptr;
+typedef const mp_limb_t *mp_srcptr;
+
+typedef struct
+{
+  int _mp_alloc;		/* Number of *limbs* allocated and pointed
+				   to by the _mp_d field.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpz_struct;
+
+typedef __mpz_struct mpz_t[1];
+
+typedef __mpz_struct *mpz_ptr;
+typedef const __mpz_struct *mpz_srcptr;
+
+extern const int mp_bits_per_limb;
+
+void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_zero (mp_ptr, mp_size_t);
+
+int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+int mpn_zero_p (mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+int mpn_perfect_square_p (mp_srcptr, mp_size_t);
+mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t);
+mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t);
+
+void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
+#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
+
+size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+void mpz_init (mpz_t);
+void mpz_init2 (mpz_t, mp_bitcnt_t);
+void mpz_clear (mpz_t);
+
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+int mpz_sgn (const mpz_t);
+int mpz_cmp_si (const mpz_t, long);
+int mpz_cmp_ui (const mpz_t, unsigned long);
+int mpz_cmp (const mpz_t, const mpz_t);
+int mpz_cmpabs_ui (const mpz_t, unsigned long);
+int mpz_cmpabs (const mpz_t, const mpz_t);
+int mpz_cmp_d (const mpz_t, double);
+int mpz_cmpabs_d (const mpz_t, double);
+
+void mpz_abs (mpz_t, const mpz_t);
+void mpz_neg (mpz_t, const mpz_t);
+void mpz_swap (mpz_t, mpz_t);
+
+void mpz_add_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_add (mpz_t, const mpz_t, const mpz_t);
+void mpz_sub_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_sub (mpz_t, unsigned long, const mpz_t);
+void mpz_sub (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_mul_si (mpz_t, const mpz_t, long int);
+void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_mul (mpz_t, const mpz_t, const mpz_t);
+void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_addmul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_addmul (mpz_t, const mpz_t, const mpz_t);
+void mpz_submul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_submul (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_mod (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_divexact (mpz_t, const mpz_t, const mpz_t);
+
+int mpz_divisible_p (const mpz_t, const mpz_t);
+int mpz_congruent_p (const mpz_t, const mpz_t, const mpz_t);
+
+unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_fdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_tdiv_ui (const mpz_t, unsigned long);
+
+unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long);
+
+void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long);
+
+int mpz_divisible_ui_p (const mpz_t, unsigned long);
+
+unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_gcd (mpz_t, const mpz_t, const mpz_t);
+void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_lcm (mpz_t, const mpz_t, const mpz_t);
+int mpz_invert (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t);
+void mpz_sqrt (mpz_t, const mpz_t);
+int mpz_perfect_square_p (const mpz_t);
+
+void mpz_pow_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long);
+void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
+void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t);
+
+void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long);
+int mpz_root (mpz_t, const mpz_t, unsigned long);
+
+void mpz_fac_ui (mpz_t, unsigned long);
+void mpz_2fac_ui (mpz_t, unsigned long);
+void mpz_mfac_uiui (mpz_t, unsigned long, unsigned long);
+void mpz_bin_uiui (mpz_t, unsigned long, unsigned long);
+
+int mpz_probab_prime_p (const mpz_t, int);
+
+int mpz_tstbit (const mpz_t, mp_bitcnt_t);
+void mpz_setbit (mpz_t, mp_bitcnt_t);
+void mpz_clrbit (mpz_t, mp_bitcnt_t);
+void mpz_combit (mpz_t, mp_bitcnt_t);
+
+void mpz_com (mpz_t, const mpz_t);
+void mpz_and (mpz_t, const mpz_t, const mpz_t);
+void mpz_ior (mpz_t, const mpz_t, const mpz_t);
+void mpz_xor (mpz_t, const mpz_t, const mpz_t);
+
+mp_bitcnt_t mpz_popcount (const mpz_t);
+mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t);
+mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t);
+mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t);
+
+int mpz_fits_slong_p (const mpz_t);
+int mpz_fits_ulong_p (const mpz_t);
+int mpz_fits_sint_p (const mpz_t);
+int mpz_fits_uint_p (const mpz_t);
+int mpz_fits_sshort_p (const mpz_t);
+int mpz_fits_ushort_p (const mpz_t);
+long int mpz_get_si (const mpz_t);
+unsigned long int mpz_get_ui (const mpz_t);
+double mpz_get_d (const mpz_t);
+size_t mpz_size (const mpz_t);
+mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t);
+
+void mpz_realloc2 (mpz_t, mp_bitcnt_t);
+mp_srcptr mpz_limbs_read (mpz_srcptr);
+mp_ptr mpz_limbs_modify (mpz_t, mp_size_t);
+mp_ptr mpz_limbs_write (mpz_t, mp_size_t);
+void mpz_limbs_finish (mpz_t, mp_size_t);
+mpz_srcptr mpz_roinit_n (mpz_t, mp_srcptr, mp_size_t);
+
+#define MPZ_ROINIT_N(xp, xs) {{0, (xs),(xp) }}
+
+void mpz_set_si (mpz_t, signed long int);
+void mpz_set_ui (mpz_t, unsigned long int);
+void mpz_set (mpz_t, const mpz_t);
+void mpz_set_d (mpz_t, double);
+
+void mpz_init_set_si (mpz_t, signed long int);
+void mpz_init_set_ui (mpz_t, unsigned long int);
+void mpz_init_set (mpz_t, const mpz_t);
+void mpz_init_set_d (mpz_t, double);
+
+size_t mpz_sizeinbase (const mpz_t, int);
+char *mpz_get_str (char *, int, const mpz_t);
+int mpz_set_str (mpz_t, const char *, int);
+int mpz_init_set_str (mpz_t, const char *, int);
+
+/* This long list taken from gmp.h. */
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */		\
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */		\
+  || defined (__STDIO_LOADED)         /* VMS */			\
+  || defined (_STDIO)                 /* HPE NonStop */         \
+  || defined (__DEFINED_FILE)         /* musl */
+size_t mpz_out_str (FILE *, int, const mpz_t);
+#endif
+
+void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *);
+void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t);
+
+#if defined (__cplusplus)
+}
+#endif
+#endif /* __MINI_GMP_H__ */

diff --git a/mini-gmp/mini-mpq.c b/mini-gmp/mini-mpq.c
new file mode 100644
index 0000000..58ce37f
--- /dev/null
+++ b/mini-gmp/mini-mpq.c

@@ -0,0 +1,556 @@
+/* mini-mpq, a minimalistic implementation of a GNU GMP subset.
+
+   Contributed to the GNU project by Marco Bodrato
+
+   Acknowledgment: special thanks to Bradley Lucier for his comments
+   to the preliminary version of this code.
+
+Copyright 2018-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mini-mpq.h"
+
+#ifndef GMP_LIMB_HIGHBIT
+/* Define macros and static functions already defined by mini-gmp.c */
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
+#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
+#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
+#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static mpz_srcptr
+mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
+{
+  x->_mp_alloc = 0;
+  x->_mp_d = (mp_ptr) xp;
+  x->_mp_size = xs;
+  return x;
+}
+
+static void
+gmp_die (const char *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+  abort();
+}
+#endif
+
+
+/* MPQ helper functions */
+static mpq_srcptr
+mpq_roinit_normal_nn (mpq_t x, mp_srcptr np, mp_size_t ns,
+		     mp_srcptr dp, mp_size_t ds)
+{
+  mpz_roinit_normal_n (mpq_numref(x), np, ns);
+  mpz_roinit_normal_n (mpq_denref(x), dp, ds);
+  return x;
+}
+
+static mpq_srcptr
+mpq_roinit_zz (mpq_t x, mpz_srcptr n, mpz_srcptr d)
+{
+  return mpq_roinit_normal_nn (x, n->_mp_d, n->_mp_size,
+			       d->_mp_d, d->_mp_size);
+}
+
+static void
+mpq_nan_init (mpq_t x)
+{
+  mpz_init (mpq_numref (x));
+  mpz_init (mpq_denref (x));
+}
+
+void
+mpq_init (mpq_t x)
+{
+  mpz_init (mpq_numref (x));
+  mpz_init_set_ui (mpq_denref (x), 1);
+}
+
+void
+mpq_clear (mpq_t x)
+{
+  mpz_clear (mpq_numref (x));
+  mpz_clear (mpq_denref (x));
+}
+
+static void
+mpq_canonical_sign (mpq_t r)
+{
+  mp_size_t ds = mpq_denref (r)->_mp_size;
+  if (ds <= 0)
+    {
+      if (ds == 0)
+	gmp_die("mpq: Fraction with zero denominator.");
+      mpz_neg (mpq_denref (r), mpq_denref (r));
+      mpz_neg (mpq_numref (r), mpq_numref (r));
+    }
+}
+
+static void
+mpq_helper_canonicalize (mpq_t r, const mpz_t num, const mpz_t den)
+{
+  if (num->_mp_size == 0)
+    mpq_set_ui (r, 0, 1);
+  else
+    {
+      mpz_t g;
+
+      mpz_init (g);
+      mpz_gcd (g, num, den);
+      mpz_tdiv_q (mpq_numref (r), num, g);
+      mpz_tdiv_q (mpq_denref (r), den, g);
+      mpz_clear (g);
+      mpq_canonical_sign (r);
+    }
+}
+
+void
+mpq_canonicalize (mpq_t r)
+{
+  mpq_helper_canonicalize (r, mpq_numref (r), mpq_denref (r));
+}
+
+void
+mpq_swap (mpq_t a, mpq_t b)
+{
+  mpz_swap (mpq_numref (a), mpq_numref (b));
+  mpz_swap (mpq_denref (a), mpq_denref (b));
+}
+
+
+/* MPQ assignment and conversions. */
+void
+mpz_set_q (mpz_t r, const mpq_t q)
+{
+  mpz_tdiv_q (r, mpq_numref (q), mpq_denref (q));
+}
+
+void
+mpq_set (mpq_t r, const mpq_t q)
+{
+  mpz_set (mpq_numref (r), mpq_numref (q));
+  mpz_set (mpq_denref (r), mpq_denref (q));
+}
+
+void
+mpq_set_ui (mpq_t r, unsigned long n, unsigned long d)
+{
+  mpz_set_ui (mpq_numref (r), n);
+  mpz_set_ui (mpq_denref (r), d);
+}
+
+void
+mpq_set_si (mpq_t r, signed long n, unsigned long d)
+{
+  mpz_set_si (mpq_numref (r), n);
+  mpz_set_ui (mpq_denref (r), d);
+}
+
+void
+mpq_set_z (mpq_t r, const mpz_t n)
+{
+  mpz_set_ui (mpq_denref (r), 1);
+  mpz_set (mpq_numref (r), n);
+}
+
+void
+mpq_set_num (mpq_t r, const mpz_t z)
+{
+  mpz_set (mpq_numref (r), z);
+}
+
+void
+mpq_set_den (mpq_t r, const mpz_t z)
+{
+  mpz_set (mpq_denref (r), z);
+}
+
+void
+mpq_get_num (mpz_t r, const mpq_t q)
+{
+  mpz_set (r, mpq_numref (q));
+}
+
+void
+mpq_get_den (mpz_t r, const mpq_t q)
+{
+  mpz_set (r, mpq_denref (q));
+}
+
+
+/* MPQ comparisons and the like. */
+int
+mpq_cmp (const mpq_t a, const mpq_t b)
+{
+  mpz_t t1, t2;
+  int res;
+
+  mpz_init (t1);
+  mpz_init (t2);
+  mpz_mul (t1, mpq_numref (a), mpq_denref (b));
+  mpz_mul (t2, mpq_numref (b), mpq_denref (a));
+  res = mpz_cmp (t1, t2);
+  mpz_clear (t1);
+  mpz_clear (t2);
+
+  return res;
+}
+
+int
+mpq_cmp_z (const mpq_t a, const mpz_t b)
+{
+  mpz_t t;
+  int res;
+
+  mpz_init (t);
+  mpz_mul (t, b, mpq_denref (a));
+  res = mpz_cmp (mpq_numref (a), t);
+  mpz_clear (t);
+
+  return res;
+}
+
+int
+mpq_equal (const mpq_t a, const mpq_t b)
+{
+  return (mpz_cmp (mpq_numref (a), mpq_numref (b)) == 0) &&
+    (mpz_cmp (mpq_denref (a), mpq_denref (b)) == 0);
+}
+
+int
+mpq_cmp_ui (const mpq_t q, unsigned long n, unsigned long d)
+{
+  mpq_t t;
+  assert (d != 0);
+  if (ULONG_MAX <= GMP_LIMB_MAX) {
+    mp_limb_t nl = n, dl = d;
+    return mpq_cmp (q, mpq_roinit_normal_nn (t, &nl, n != 0, &dl, 1));
+  } else {
+    int ret;
+
+    mpq_nan_init (t);
+    mpq_set_ui (t, n, d);
+    ret = mpq_cmp (q, t);
+    mpq_clear (t);
+
+    return ret;
+  }
+}
+
+int
+mpq_cmp_si (const mpq_t q, signed long n, unsigned long d)
+{
+  assert (d != 0);
+
+  if (n >= 0)
+    return mpq_cmp_ui (q, n, d);
+  else
+    {
+      mpq_t t;
+
+      if (ULONG_MAX <= GMP_LIMB_MAX)
+	{
+	  mp_limb_t nl = GMP_NEG_CAST (unsigned long, n), dl = d;
+	  return mpq_cmp (q, mpq_roinit_normal_nn (t, &nl, -1, &dl, 1));
+	}
+      else
+	{
+	  unsigned long l_n = GMP_NEG_CAST (unsigned long, n);
+
+	  mpq_roinit_normal_nn (t, mpq_numref (q)->_mp_d, - mpq_numref (q)->_mp_size,
+				mpq_denref (q)->_mp_d, mpq_denref (q)->_mp_size);
+	  return - mpq_cmp_ui (t, l_n, d);
+	}
+    }
+}
+
+int
+mpq_sgn (const mpq_t a)
+{
+  return mpz_sgn (mpq_numref (a));
+}
+
+
+/* MPQ arithmetic. */
+void
+mpq_abs (mpq_t r, const mpq_t q)
+{
+  mpz_abs (mpq_numref (r), mpq_numref (q));
+  mpz_set (mpq_denref (r), mpq_denref (q));
+}
+
+void
+mpq_neg (mpq_t r, const mpq_t q)
+{
+  mpz_neg (mpq_numref (r), mpq_numref (q));
+  mpz_set (mpq_denref (r), mpq_denref (q));
+}
+
+void
+mpq_add (mpq_t r, const mpq_t a, const mpq_t b)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  mpz_gcd (t, mpq_denref (a), mpq_denref (b));
+  if (mpz_cmp_ui (t, 1) == 0)
+    {
+      mpz_mul (t, mpq_numref (a), mpq_denref (b));
+      mpz_addmul (t, mpq_numref (b), mpq_denref (a));
+      mpz_mul (mpq_denref (r), mpq_denref (a), mpq_denref (b));
+      mpz_swap (mpq_numref (r), t);
+    }
+  else
+    {
+      mpz_t x, y;
+      mpz_init (x);
+      mpz_init (y);
+
+      mpz_tdiv_q (x, mpq_denref (b), t);
+      mpz_tdiv_q (y, mpq_denref (a), t);
+      mpz_mul (x, mpq_numref (a), x);
+      mpz_addmul (x, mpq_numref (b), y);
+
+      mpz_gcd (t, x, t);
+      mpz_tdiv_q (mpq_numref (r), x, t);
+      mpz_tdiv_q (x, mpq_denref (b), t);
+      mpz_mul (mpq_denref (r), x, y);
+
+      mpz_clear (x);
+      mpz_clear (y);
+    }
+  mpz_clear (t);
+}
+
+void
+mpq_sub (mpq_t r, const mpq_t a, const mpq_t b)
+{
+  mpq_t t;
+
+  mpq_roinit_normal_nn (t, mpq_numref (b)->_mp_d, - mpq_numref (b)->_mp_size,
+			mpq_denref (b)->_mp_d, mpq_denref (b)->_mp_size);
+  mpq_add (r, a, t);
+}
+
+void
+mpq_div (mpq_t r, const mpq_t a, const mpq_t b)
+{
+  mpq_t t;
+  mpq_mul (r, a, mpq_roinit_zz (t, mpq_denref (b), mpq_numref (b)));
+}
+
+void
+mpq_mul (mpq_t r, const mpq_t a, const mpq_t b)
+{
+  mpq_t t;
+  mpq_nan_init (t);
+
+  if (a != b) {
+    mpq_helper_canonicalize (t, mpq_numref (a), mpq_denref (b));
+    mpq_helper_canonicalize (r, mpq_numref (b), mpq_denref (a));
+
+    a = r;
+    b = t;
+  }
+
+  mpz_mul (mpq_numref (r), mpq_numref (a), mpq_numref (b));
+  mpz_mul (mpq_denref (r), mpq_denref (a), mpq_denref (b));
+  mpq_clear (t);
+}
+
+static void
+mpq_helper_2exp (mpz_t rn, mpz_t rd, const mpz_t qn, const mpz_t qd, mp_bitcnt_t e)
+{
+  mp_bitcnt_t z = mpz_scan1 (qd, 0);
+  z = GMP_MIN (z, e);
+  mpz_mul_2exp (rn, qn, e - z);
+  mpz_tdiv_q_2exp (rd, qd, z);
+}
+
+void
+mpq_div_2exp (mpq_t r, const mpq_t q, mp_bitcnt_t e)
+{
+  mpq_helper_2exp (mpq_denref (r), mpq_numref (r), mpq_denref (q), mpq_numref (q), e);
+}
+
+void
+mpq_mul_2exp (mpq_t r, const mpq_t q, mp_bitcnt_t e)
+{
+  mpq_helper_2exp (mpq_numref (r), mpq_denref (r), mpq_numref (q), mpq_denref (q), e);
+}
+
+void
+mpq_inv (mpq_t r, const mpq_t q)
+{
+  mpq_set (r, q);
+  mpz_swap (mpq_denref (r), mpq_numref (r));
+  mpq_canonical_sign (r);
+}
+
+
+/* MPQ to/from double. */
+void
+mpq_set_d (mpq_t r, double x)
+{
+  mpz_set_ui (mpq_denref (r), 1);
+
+  /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
+     zero or infinity. */
+  if (x == x * 0.5 || x != x)
+    mpq_numref (r)->_mp_size = 0;
+  else
+    {
+      double B;
+      mp_bitcnt_t e;
+
+      B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
+      for (e = 0; x != x + 0.5; e += GMP_LIMB_BITS)
+	x *= B;
+
+      mpz_set_d (mpq_numref (r), x);
+      mpq_div_2exp (r, r, e);
+    }
+}
+
+double
+mpq_get_d (const mpq_t u)
+{
+  mp_bitcnt_t ne, de, ee;
+  mpz_t z;
+  double B, ret;
+
+  ne = mpz_sizeinbase (mpq_numref (u), 2);
+  de = mpz_sizeinbase (mpq_denref (u), 2);
+
+  ee = CHAR_BIT * sizeof (double);
+  if (de == 1 || ne > de + ee)
+    ee = 0;
+  else
+    ee = (ee + de - ne) / GMP_LIMB_BITS + 1;
+
+  mpz_init (z);
+  mpz_mul_2exp (z, mpq_numref (u), ee * GMP_LIMB_BITS);
+  mpz_tdiv_q (z, z, mpq_denref (u));
+  ret = mpz_get_d (z);
+  mpz_clear (z);
+
+  B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
+  for (B = 1 / B; ee != 0; --ee)
+    ret *= B;
+
+  return ret;
+}
+
+
+/* MPQ and strings/streams. */
+char *
+mpq_get_str (char *sp, int base, const mpq_t q)
+{
+  char *res;
+  char *rden;
+  size_t len;
+
+  res = mpz_get_str (sp, base, mpq_numref (q));
+  if (res == NULL || mpz_cmp_ui (mpq_denref (q), 1) == 0)
+    return res;
+
+  len = strlen (res) + 1;
+  rden = sp ? sp + len : NULL;
+  rden = mpz_get_str (rden, base, mpq_denref (q));
+  assert (rden != NULL);
+
+  if (sp == NULL) {
+    void * (*gmp_reallocate_func) (void *, size_t, size_t);
+    void (*gmp_free_func) (void *, size_t);
+    size_t lden;
+
+    mp_get_memory_functions (NULL, &gmp_reallocate_func, &gmp_free_func);
+    lden = strlen (rden) + 1;
+    res = (char *) gmp_reallocate_func (res, len, (lden + len) * sizeof (char));
+    memcpy (res + len, rden, lden);
+    gmp_free_func (rden, lden);
+  }
+
+  res [len - 1] = '/';
+  return res;
+}
+
+size_t
+mpq_out_str (FILE *stream, int base, const mpq_t x)
+{
+  char * str;
+  size_t len, n;
+  void (*gmp_free_func) (void *, size_t);
+
+  str = mpq_get_str (NULL, base, x);
+  if (!str)
+    return 0;
+  len = strlen (str);
+  n = fwrite (str, 1, len, stream);
+  mp_get_memory_functions (NULL, NULL, &gmp_free_func);
+  gmp_free_func (str, len + 1);
+  return n;
+}
+
+int
+mpq_set_str (mpq_t r, const char *sp, int base)
+{
+  const char *slash;
+
+  slash = strchr (sp, '/');
+  if (slash == NULL) {
+    mpz_set_ui (mpq_denref(r), 1);
+    return mpz_set_str (mpq_numref(r), sp, base);
+  } else {
+    char *num;
+    size_t numlen;
+    int ret;
+    void * (*gmp_allocate_func) (size_t);
+    void (*gmp_free_func) (void *, size_t);
+
+    mp_get_memory_functions (&gmp_allocate_func, NULL, &gmp_free_func);
+    numlen = slash - sp;
+    num = (char *) gmp_allocate_func (numlen + 1);
+    memcpy (num, sp, numlen);
+    num[numlen] = '\0';
+    ret = mpz_set_str (mpq_numref(r), num, base);
+    gmp_free_func (num, numlen + 1);
+
+    if (ret != 0)
+      return ret;
+
+    return mpz_set_str (mpq_denref(r), slash + 1, base);
+  }
+}

diff --git a/mini-gmp/mini-mpq.h b/mini-gmp/mini-mpq.h
new file mode 100644
index 0000000..8eabcec
--- /dev/null
+++ b/mini-gmp/mini-mpq.h

@@ -0,0 +1,114 @@
+/* mini-mpq, a minimalistic implementation of a GNU GMP subset.
+
+Copyright 2018, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* Header */
+
+#ifndef __MINI_MPQ_H__
+#define __MINI_MPQ_H__
+
+#include "mini-gmp.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+typedef struct
+{
+  __mpz_struct _mp_num;
+  __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct mpq_t[1];
+
+typedef const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+void mpq_abs (mpq_t, const mpq_t);
+void mpq_add (mpq_t, const mpq_t, const mpq_t);
+void mpq_canonicalize (mpq_t);
+void mpq_clear (mpq_t);
+int mpq_cmp (const mpq_t, const mpq_t);
+int mpq_cmp_si (const mpq_t, signed long, unsigned long);
+int mpq_cmp_ui (const mpq_t, unsigned long, unsigned long);
+int mpq_cmp_z (const mpq_t, const mpz_t);
+void mpq_div (mpq_t, const mpq_t, const mpq_t);
+void mpq_div_2exp (mpq_t, const mpq_t, mp_bitcnt_t);
+int mpq_equal (const mpq_t, const mpq_t);
+double mpq_get_d (const mpq_t);
+void mpq_get_den (mpz_t, const mpq_t);
+void mpq_get_num (mpz_t, const mpq_t);
+char * mpq_get_str (char *, int, const mpq_t q);
+void mpq_init (mpq_t);
+void mpq_inv (mpq_t, const mpq_t);
+void mpq_mul (mpq_t, const mpq_t, const mpq_t);
+void mpq_mul_2exp (mpq_t, const mpq_t, mp_bitcnt_t);
+void mpq_neg (mpq_t, const mpq_t);
+void mpq_set (mpq_t, const mpq_t);
+void mpq_set_d (mpq_t, double);
+void mpq_set_den (mpq_t, const mpz_t);
+void mpq_set_num (mpq_t, const mpz_t);
+void mpq_set_si (mpq_t, signed long, unsigned long);
+int mpq_set_str (mpq_t, const char *, int);
+void mpq_set_ui (mpq_t, unsigned long, unsigned long);
+void mpq_set_z (mpq_t, const mpz_t);
+int mpq_sgn (const mpq_t);
+void mpq_sub (mpq_t, const mpq_t, const mpq_t);
+void mpq_swap (mpq_t, mpq_t);
+
+/* This long list taken from gmp.h. */
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */                \
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */             \
+  || defined (__STDIO_LOADED)         /* VMS */
+size_t mpq_out_str (FILE *, int, const mpq_t);
+#endif
+
+void mpz_set_q (mpz_t, const mpq_t);
+
+#if defined (__cplusplus)
+}
+#endif
+#endif /* __MINI_MPQ_H__ */

diff --git a/mp_bpl.c b/mp_bpl.c
new file mode 100644
index 0000000..a13fb15
--- /dev/null
+++ b/mp_bpl.c

@@ -0,0 +1,34 @@
+/*
+Copyright 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+const int mp_bits_per_limb = GMP_LIMB_BITS;
+const int __gmp_0 = 0;
+int __gmp_junk;

diff --git a/mp_clz_tab.c b/mp_clz_tab.c
new file mode 100644
index 0000000..fc7cb0b
--- /dev/null
+++ b/mp_clz_tab.c

@@ -0,0 +1,48 @@
+/* __clz_tab -- support for longlong.h
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+const
+unsigned char __clz_tab[129] =
+{
+  1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  9
+};
+#endif

diff --git a/mp_dv_tab.c b/mp_dv_tab.c
new file mode 100644
index 0000000..cd09d5d
--- /dev/null
+++ b/mp_dv_tab.c

@@ -0,0 +1,77 @@
+/* __gmp_digit_value_tab -- support for mp*_set_str
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Table to be indexed by character, to get its numerical value.  Assumes ASCII
+   character set.
+
+   First part of table supports common usages, where 'A' and 'a' have the same
+   value; this supports bases 2..36
+
+   At offset 208, values for bases 37..62 start.  Here, 'A' has the value 10
+   (in decimal) and 'a' has the value 36.  */
+
+#define X 0xff
+const unsigned char __gmp_digit_value_tab[] =
+{
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,
+  51,52,53,54,55,56,57,58,59,60,61,X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X
+};

diff --git a/mp_get_fns.c b/mp_get_fns.c
new file mode 100644
index 0000000..70ed96d
--- /dev/null
+++ b/mp_get_fns.c

@@ -0,0 +1,47 @@
+/* mp_get_memory_functions -- Get the allocate, reallocate, and free functions.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp-impl.h"
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+			 void *(**realloc_func) (void *, size_t, size_t),
+			 void (**free_func) (void *, size_t)) __GMP_NOTHROW
+{
+  if (alloc_func != NULL)
+    *alloc_func = __gmp_allocate_func;
+
+  if (realloc_func != NULL)
+    *realloc_func = __gmp_reallocate_func;
+
+  if (free_func != NULL)
+    *free_func = __gmp_free_func;
+}

diff --git a/mp_minv_tab.c b/mp_minv_tab.c
new file mode 100644
index 0000000..833d4c1
--- /dev/null
+++ b/mp_minv_tab.c

@@ -0,0 +1,58 @@
+/* A table of data supporting binvert_limb().
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* binvert_limb_table[i] is the multiplicative inverse of 2*i+1 mod 256,
+   ie. (binvert_limb_table[i] * (2*i+1)) % 256 == 1 */
+
+const unsigned char  binvert_limb_table[128] = {
+  0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
+  0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
+  0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
+  0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
+  0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
+  0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
+  0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
+  0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
+  0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
+  0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
+  0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
+  0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
+  0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
+  0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
+  0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
+  0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
+};

diff --git a/mp_set_fns.c b/mp_set_fns.c
new file mode 100644
index 0000000..b7bcd5b
--- /dev/null
+++ b/mp_set_fns.c

@@ -0,0 +1,49 @@
+/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions
+   for use by the mp package.
+
+Copyright 1991, 1993, 1994, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+			 void *(*realloc_func) (void *, size_t, size_t),
+			 void (*free_func) (void *, size_t)) __GMP_NOTHROW
+{
+  if (alloc_func == 0)
+    alloc_func = __gmp_default_allocate;
+  if (realloc_func == 0)
+    realloc_func = __gmp_default_reallocate;
+  if (free_func == 0)
+    free_func = __gmp_default_free;
+
+  __gmp_allocate_func = alloc_func;
+  __gmp_reallocate_func = realloc_func;
+  __gmp_free_func = free_func;
+}

diff --git a/mpf/abs.c b/mpf/abs.c
new file mode 100644
index 0000000..1642a46
--- /dev/null
+++ b/mpf/abs.c

@@ -0,0 +1,58 @@
+/* mpf_abs -- Compute the absolute value of a float.
+
+Copyright 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_abs (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t size;
+
+  size = ABS (u->_mp_size);
+  if (r != u)
+    {
+      mp_size_t prec;
+      mp_ptr rp, up;
+
+      prec = r->_mp_prec + 1;	/* lie not to lose precision in assignment */
+      rp = r->_mp_d;
+      up = u->_mp_d;
+
+      if (size > prec)
+	{
+	  up += size - prec;
+	  size = prec;
+	}
+
+      MPN_COPY (rp, up, size);
+      r->_mp_exp = u->_mp_exp;
+    }
+  r->_mp_size = size;
+}

diff --git a/mpf/add.c b/mpf/add.c
new file mode 100644
index 0000000..77a9e47
--- /dev/null
+++ b/mpf/add.c

@@ -0,0 +1,183 @@
+/* mpf_add -- Add two floats.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_add (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t uexp;
+  mp_size_t ediff;
+  mp_limb_t cy;
+  int negate;
+  TMP_DECL;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (usize == 0)
+    {
+    set_r_v_maybe:
+      if (r != v)
+        mpf_set (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      v = u;
+      goto set_r_v_maybe;
+    }
+
+  /* If signs of U and V are different, perform subtraction.  */
+  if ((usize ^ vsize) < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = v->_mp_exp;
+      v_negated._mp_d = v->_mp_d;
+      mpf_sub (r, u, &v_negated);
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Signs are now known to be the same.  */
+  negate = usize < 0;
+
+  /* Make U be the operand with the largest exponent.  */
+  if (u->_mp_exp < v->_mp_exp)
+    {
+      mpf_srcptr t;
+      t = u; u = v; v = t;
+      usize = u->_mp_size;
+      vsize = v->_mp_size;
+    }
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  rp = r->_mp_d;
+  prec = r->_mp_prec;
+  uexp = u->_mp_exp;
+  ediff = u->_mp_exp - v->_mp_exp;
+
+  /* If U extends beyond PREC, ignore the part that does.  */
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this may make vsize negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+#if 0
+  /* Locate the least significant non-zero limb in (the needed parts
+     of) U and V, to simplify the code below.  */
+  while (up[0] == 0)
+    up++, usize--;
+  while (vp[0] == 0)
+    vp++, vsize--;
+#endif
+
+  /* Allocate temp space for the result.  Allocate
+     just vsize + ediff later???  */
+  tp = TMP_ALLOC_LIMBS (prec);
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (rp != up)
+	MPN_COPY_INCR (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+	{
+	  /* U and V partially overlaps.  */
+	  if (vsize + ediff <= usize)
+	    {
+	      /* uuuu     */
+	      /*   v      */
+	      mp_size_t size;
+	      size = usize - ediff - vsize;
+	      MPN_COPY (tp, up, size);
+	      cy = mpn_add (tp + size, up + size, usize - size, vp, vsize);
+	      rsize = usize;
+	    }
+	  else
+	    {
+	      /* uuuu     */
+	      /*   vvvvv  */
+	      mp_size_t size;
+	      size = vsize + ediff - usize;
+	      MPN_COPY (tp, vp, size);
+	      cy = mpn_add (tp + size, up, usize, vp + size, usize - ediff);
+	      rsize = vsize + ediff;
+	    }
+	}
+      else
+	{
+	  /* uuuu     */
+	  /*      vv  */
+	  mp_size_t size;
+	  size = vsize + ediff - usize;
+	  MPN_COPY (tp, vp, vsize);
+	  MPN_ZERO (tp + vsize, ediff - usize);
+	  MPN_COPY (tp + size, up, usize);
+	  cy = 0;
+	  rsize = size + usize;
+	}
+
+      MPN_COPY (rp, tp, rsize);
+      rp[rsize] = cy;
+      rsize += cy;
+      uexp += cy;
+    }
+
+  r->_mp_size = negate ? -rsize : rsize;
+  r->_mp_exp = uexp;
+  TMP_FREE;
+}

diff --git a/mpf/add_ui.c b/mpf/add_ui.c
new file mode 100644
index 0000000..1e2a94b
--- /dev/null
+++ b/mpf/add_ui.c

@@ -0,0 +1,152 @@
+/* mpf_add_ui -- Add a float and an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_add_ui (mpf_ptr sum, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up = u->_mp_d;
+  mp_ptr sump = sum->_mp_d;
+  mp_size_t usize, sumsize;
+  mp_size_t prec = sum->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+  if (usize <= 0)
+    {
+      if (usize == 0)
+	{
+	  mpf_set_ui (sum, v);
+	  return;
+	}
+      else
+	{
+	  __mpf_struct u_negated;
+	  u_negated._mp_size = -usize;
+	  u_negated._mp_exp = u->_mp_exp;
+	  u_negated._mp_d = u->_mp_d;
+	  mpf_sub_ui (sum, &u_negated, v);
+	  sum->_mp_size = -(sum->_mp_size);
+	  return;
+	}
+    }
+
+  if (v == 0)
+    {
+    sum_is_u:
+      if (u != sum)
+	{
+	  sumsize = MIN (usize, prec + 1);
+	  MPN_COPY (sum->_mp_d, up + usize - sumsize, sumsize);
+	  sum->_mp_size = sumsize;
+	  sum->_mp_exp = u->_mp_exp;
+	}
+      return;
+    }
+
+  if (uexp > 0)
+    {
+      /* U >= 1.  */
+      if (uexp > prec)
+	{
+	  /* U >> V, V is not part of final result.  */
+	  goto sum_is_u;
+	}
+      else
+	{
+	  /* U's "limb point" is somewhere between the first limb
+	     and the PREC:th limb.
+	     Both U and V are part of the final result.  */
+	  if (uexp > usize)
+	    {
+	      /*   uuuuuu0000. */
+	      /* +          v. */
+	      /* We begin with moving U to the top of SUM, to handle
+		 samevar(U,SUM).  */
+	      MPN_COPY_DECR (sump + uexp - usize, up, usize);
+	      sump[0] = v;
+	      MPN_ZERO (sump + 1, uexp - usize - 1);
+#if 0 /* What is this??? */
+	      if (sum == u)
+		MPN_COPY (sum->_mp_d, sump, uexp);
+#endif
+	      sum->_mp_size = uexp;
+	      sum->_mp_exp = uexp;
+	    }
+	  else
+	    {
+	      /*   uuuuuu.uuuu */
+	      /* +      v.     */
+	      mp_limb_t cy_limb;
+	      if (usize > prec)
+		{
+		  /* Ignore excess limbs in U.  */
+		  up += usize - prec;
+		  usize -= usize - prec; /* Eq. usize = prec */
+		}
+	      if (sump != up)
+		MPN_COPY_INCR (sump, up, usize - uexp);
+	      cy_limb = mpn_add_1 (sump + usize - uexp, up + usize - uexp,
+				   uexp, (mp_limb_t) v);
+	      sump[usize] = cy_limb;
+	      sum->_mp_size = usize + cy_limb;
+	      sum->_mp_exp = uexp + cy_limb;
+	    }
+	}
+    }
+  else
+    {
+      /* U < 1, so V > U for sure.  */
+      /* v.         */
+      /*  .0000uuuu */
+      if ((-uexp) >= prec)
+	{
+	  sump[0] = v;
+	  sum->_mp_size = 1;
+	  sum->_mp_exp = 1;
+	}
+      else
+	{
+	  if (usize + (-uexp) + 1 > prec)
+	    {
+	      /* Ignore excess limbs in U.  */
+	      up += usize + (-uexp) + 1 - prec;
+	      usize -= usize + (-uexp) + 1 - prec;
+	    }
+	  if (sump != up)
+	    MPN_COPY_INCR (sump, up, usize);
+	  MPN_ZERO (sump + usize, -uexp);
+	  sump[usize + (-uexp)] = v;
+	  sum->_mp_size = usize + (-uexp) + 1;
+	  sum->_mp_exp = 1;
+	}
+    }
+}

diff --git a/mpf/ceilfloor.c b/mpf/ceilfloor.c
new file mode 100644
index 0000000..9bb6638
--- /dev/null
+++ b/mpf/ceilfloor.c

@@ -0,0 +1,125 @@
+/* mpf_ceil, mpf_floor -- round an mpf to an integer.
+
+Copyright 2001, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* dir==1 for ceil, dir==-1 for floor
+
+   Notice the use of prec+1 ensures mpf_ceil and mpf_floor are equivalent to
+   mpf_set if u is already an integer.  */
+
+static void __gmpf_ceil_or_floor (REGPARM_2_1 (mpf_ptr, mpf_srcptr, int)) REGPARM_ATTR (1);
+#define mpf_ceil_or_floor(r,u,dir)  __gmpf_ceil_or_floor (REGPARM_2_1 (r, u, dir))
+
+REGPARM_ATTR (1) static void
+mpf_ceil_or_floor (mpf_ptr r, mpf_srcptr u, int dir)
+{
+  mp_ptr     rp, up, p;
+  mp_size_t  size, asize, prec;
+  mp_exp_t   exp;
+
+  size = SIZ(u);
+  if (size == 0)
+    {
+    zero:
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  rp = PTR(r);
+  exp = EXP(u);
+  if (exp <= 0)
+    {
+      /* u is only a fraction */
+      if ((size ^ dir) < 0)
+        goto zero;
+      rp[0] = 1;
+      EXP(r) = 1;
+      SIZ(r) = dir;
+      return;
+    }
+  EXP(r) = exp;
+
+  up = PTR(u);
+  asize = ABS (size);
+  up += asize;
+
+  /* skip fraction part of u */
+  asize = MIN (asize, exp);
+
+  /* don't lose precision in the copy */
+  prec = PREC (r) + 1;
+
+  /* skip excess over target precision */
+  asize = MIN (asize, prec);
+
+  up -= asize;
+
+  if ((size ^ dir) >= 0)
+    {
+      /* rounding direction matches sign, must increment if ignored part is
+         non-zero */
+      for (p = PTR(u); p != up; p++)
+        {
+          if (*p != 0)
+            {
+              if (mpn_add_1 (rp, up, asize, CNST_LIMB(1)))
+                {
+                  /* was all 0xFF..FFs, which have become zeros, giving just
+                     a carry */
+                  rp[0] = 1;
+                  asize = 1;
+                  EXP(r)++;
+                }
+              SIZ(r) = (size >= 0 ? asize : -asize);
+              return;
+            }
+        }
+    }
+
+  SIZ(r) = (size >= 0 ? asize : -asize);
+  if (rp != up)
+    MPN_COPY_INCR (rp, up, asize);
+}
+
+
+void
+mpf_ceil (mpf_ptr r, mpf_srcptr u)
+{
+  mpf_ceil_or_floor (r, u, 1);
+}
+
+void
+mpf_floor (mpf_ptr r, mpf_srcptr u)
+{
+  mpf_ceil_or_floor (r, u, -1);
+}

diff --git a/mpf/clear.c b/mpf/clear.c
new file mode 100644
index 0000000..0939e03
--- /dev/null
+++ b/mpf/clear.c

@@ -0,0 +1,38 @@
+/* mpf_clear -- de-allocate the space occupied by the dynamic digit space of
+   an integer.
+
+Copyright 1993-1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_clear (mpf_ptr x)
+{
+  __GMP_FREE_FUNC_LIMBS (PTR(x), PREC(x) + 1);
+}

diff --git a/mpf/clears.c b/mpf/clears.c
new file mode 100644
index 0000000..115fa19
--- /dev/null
+++ b/mpf/clears.c

@@ -0,0 +1,49 @@
+/* mpf_clears() -- Clear multiple mpf_t variables.
+
+Copyright 2009, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpf_clears (mpf_ptr x, ...)
+{
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      __GMP_FREE_FUNC_LIMBS (PTR(x), PREC(x) + 1);
+      x = va_arg (ap, mpf_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpf/cmp.c b/mpf/cmp.c
new file mode 100644
index 0000000..3518b51
--- /dev/null
+++ b/mpf/cmp.c

@@ -0,0 +1,113 @@
+/* mpf_cmp -- Compare two floats.
+
+Copyright 1993, 1994, 1996, 2001, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpf_cmp (mpf_srcptr u, mpf_srcptr v) __GMP_NOTHROW
+{
+  mp_srcptr up, vp;
+  mp_size_t usize, vsize;
+  mp_exp_t uexp, vexp;
+  int cmp;
+  int usign;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+  usign = usize >= 0 ? 1 : -1;
+
+  /* 1. Are the signs different?  */
+  if ((usize ^ vsize) >= 0)
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+	/* vsize >= 0 */
+	return -(vsize != 0);
+      if (vsize == 0)
+	/* usize >= 0 */
+	return usize != 0;
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return usign;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  uexp = EXP(u);
+  vexp = EXP(v);
+
+  /* 2. Are the exponents different?  */
+  if (uexp > vexp)
+    return usign;
+  if (uexp < vexp)
+    return -usign;
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  up = PTR (u);
+  vp = PTR (v);
+
+#define STRICT_MPF_NORMALIZATION 0
+#if ! STRICT_MPF_NORMALIZATION
+  /* Ignore zeroes at the low end of U and V.  */
+  do {
+    mp_limb_t tl;
+    tl = up[0];
+    MPN_STRIP_LOW_ZEROS_NOT_ZERO (up, usize, tl);
+    tl = vp[0];
+    MPN_STRIP_LOW_ZEROS_NOT_ZERO (vp, vsize, tl);
+  } while (0);
+#endif
+
+  if (usize > vsize)
+    {
+      cmp = mpn_cmp (up + usize - vsize, vp, vsize);
+      /* if (cmp == 0) */
+      /*	return usign; */
+      ++cmp;
+    }
+  else if (vsize > usize)
+    {
+      cmp = mpn_cmp (up, vp + vsize - usize, usize);
+      /* if (cmp == 0) */
+      /*	return -usign; */
+    }
+  else
+    {
+      cmp = mpn_cmp (up, vp, usize);
+      if (cmp == 0)
+	return 0;
+    }
+  return cmp > 0 ? usign : -usign;
+}

diff --git a/mpf/cmp_d.c b/mpf/cmp_d.c
new file mode 100644
index 0000000..3fa099b
--- /dev/null
+++ b/mpf/cmp_d.c

@@ -0,0 +1,59 @@
+/* mpf_cmp_d -- compare mpf and double.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+
+int
+mpf_cmp_d (mpf_srcptr f, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE];
+  mpf_t      df;
+
+  /* d=NaN has no sensible return value, so raise an exception.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         return (d < 0.0 ? 1 : -1));
+
+  if (d == 0.0)
+    return SIZ(f);
+
+  PTR(df) = darray;
+  SIZ(df) = (d >= 0.0 ? LIMBS_PER_DOUBLE : -LIMBS_PER_DOUBLE);
+  EXP(df) = __gmp_extract_double (darray, ABS(d));
+
+  return mpf_cmp (f, df);
+}

diff --git a/mpf/cmp_si.c b/mpf/cmp_si.c
new file mode 100644
index 0000000..d8d9880
--- /dev/null
+++ b/mpf/cmp_si.c

@@ -0,0 +1,109 @@
+/* mpf_cmp_si -- Compare a float with a signed integer.
+
+Copyright 1993-1995, 1999-2002, 2004, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpf_cmp_si (mpf_srcptr u, long int vval) __GMP_NOTHROW
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_exp_t uexp;
+  mp_limb_t ulimb;
+  int usign;
+  unsigned long abs_vval;
+
+  usize = SIZ (u);
+
+  /* 1. Are the signs different?  */
+  if ((usize < 0) == (vval < 0)) /* don't use xor, type size may differ */
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+	/* vval >= 0 */
+	return -(vval != 0);
+      if (vval == 0)
+	/* usize >= 0 */
+	return usize != 0;
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return usize >= 0 ? 1 : -1;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  /* 2. Are the exponents different (V's exponent == 1)?  */
+  uexp = EXP (u);
+  usign = usize >= 0 ? 1 : -1;
+  usize = ABS (usize);
+  abs_vval = ABS_CAST (unsigned long, vval);
+
+#if GMP_NAIL_BITS != 0
+  if (uexp != 1 + (abs_vval > GMP_NUMB_MAX))
+    return (uexp < 1 + (abs_vval > GMP_NUMB_MAX)) ? -usign : usign;
+#else
+  if (uexp != 1)
+    return (uexp < 1) ? -usign : usign;
+#endif
+
+  up = PTR (u);
+
+  ASSERT (usize > 0);
+  ulimb = up[--usize];
+#if GMP_NAIL_BITS != 0
+  if (uexp == 2)
+    {
+      if ((ulimb >> GMP_NAIL_BITS) != 0)
+	return usign;
+      ulimb = (ulimb << GMP_NUMB_BITS);
+      if (usize != 0) ulimb |= up[--usize];
+    }
+#endif
+
+  /* 3. Compare the most significant mantissa limb with V.  */
+  if (ulimb != abs_vval)
+    return (ulimb < abs_vval) ? -usign : usign;
+
+  /* Ignore zeroes at the low end of U.  */
+  for (; *up == 0; ++up)
+    --usize;
+
+  /* 4. Now, if the number of limbs are different, we have a difference
+     since we have made sure the trailing limbs are not zero.  */
+  if (usize > 0)
+    return usign;
+
+  /* Wow, we got zero even if we tried hard to avoid it.  */
+  return 0;
+}

diff --git a/mpf/cmp_ui.c b/mpf/cmp_ui.c
new file mode 100644
index 0000000..a9a6036
--- /dev/null
+++ b/mpf/cmp_ui.c

@@ -0,0 +1,87 @@
+/* mpf_cmp_ui -- Compare a float with an unsigned integer.
+
+Copyright 1993-1995, 1999, 2001, 2002, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpf_cmp_ui (mpf_srcptr u, unsigned long int vval) __GMP_NOTHROW
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_exp_t uexp;
+  mp_limb_t ulimb;
+
+  usize = SIZ (u);
+
+  /* 1. Is U negative?  */
+  if (usize < 0)
+    return -1;
+  /* We rely on usize being non-negative in the code that follows.  */
+
+  if (vval == 0)
+    return usize != 0;
+
+  /* 2. Are the exponents different (V's exponent == 1)?  */
+  uexp = EXP (u);
+
+#if GMP_NAIL_BITS != 0
+  if (uexp != 1 + (vval > GMP_NUMB_MAX))
+    return (uexp < 1 + (vval > GMP_NUMB_MAX)) ? -1 : 1;
+#else
+  if (uexp != 1)
+    return (uexp < 1) ? -1 : 1;
+#endif
+
+  up = PTR (u);
+
+  ASSERT (usize > 0);
+  ulimb = up[--usize];
+#if GMP_NAIL_BITS != 0
+  if (uexp == 2)
+    {
+      if ((ulimb >> GMP_NAIL_BITS) != 0)
+	return 1;
+      ulimb = (ulimb << GMP_NUMB_BITS);
+      if (usize != 0) ulimb |= up[--usize];
+    }
+#endif
+
+  /* 3. Compare the most significant mantissa limb with V.  */
+  if (ulimb != vval)
+    return (ulimb < vval) ? -1 : 1;
+
+  /* Ignore zeroes at the low end of U.  */
+  for (; *up == 0; ++up)
+    --usize;
+
+  /* 4. Now, if the number of limbs are different, we have a difference
+     since we have made sure the trailing limbs are not zero.  */
+  return (usize > 0);
+}

diff --git a/mpf/cmp_z.c b/mpf/cmp_z.c
new file mode 100644
index 0000000..279980f
--- /dev/null
+++ b/mpf/cmp_z.c

@@ -0,0 +1,45 @@
+/* mpf_cmp_z -- Compare a float with an integer.
+
+Copyright 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpf_cmp_z (mpf_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mpf_t vf;
+  mp_size_t size;
+
+  SIZ (vf) = size = SIZ (v);
+  EXP (vf) = size = ABS (size);
+  /* PREC (vf) = size; */
+  PTR (vf) = PTR (v);
+
+  return mpf_cmp (u, vf);
+}

diff --git a/mpf/div.c b/mpf/div.c
new file mode 100644
index 0000000..d13af75
--- /dev/null
+++ b/mpf/div.c

@@ -0,0 +1,137 @@
+/* mpf_div -- Divide two floats.
+
+Copyright 1993, 1994, 1996, 2000-2002, 2004, 2005, 2010, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Not done:
+
+   No attempt is made to identify an overlap u==v.  The result will be
+   correct (1.0), but a full actual division is done whereas of course
+   x/x==1 needs no work.  Such a call is not a sensible thing to make, and
+   it's left to an application to notice and optimize if it might arise
+   somehow through pointer aliasing or whatever.
+
+   Enhancements:
+
+   The high quotient limb is non-zero when high{up,vsize} >= {vp,vsize}.  We
+   could make that comparison and use qsize==prec instead of qsize==prec+1,
+   to save one limb in the division.
+
+   If r==u but the size is enough bigger than prec that there won't be an
+   overlap between quotient and dividend in mpn_div_q, then we can avoid
+   copying up,usize.  This would only arise from a prec reduced with
+   mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
+   it could be worked into the copy_u decision cleanly.  */
+
+void
+mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp, new_vp;
+  mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros;
+  mp_size_t sign_quotient, prec, high_zero, chop;
+  mp_exp_t rexp;
+  int copy_u;
+  TMP_DECL;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+
+  if (UNLIKELY (vsize == 0))
+    DIVIDE_BY_ZERO;
+
+  if (usize == 0)
+    {
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  sign_quotient = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  prec = PREC(r);
+
+  TMP_MARK;
+  rexp = EXP(u) - EXP(v) + 1;
+
+  rp = PTR(r);
+  up = PTR(u);
+  vp = PTR(v);
+
+  prospective_rsize = usize - vsize + 1; /* quot from using given u,v sizes */
+  rsize = prec + 1;			 /* desired quot */
+
+  zeros = rsize - prospective_rsize;	 /* padding u to give rsize */
+  copy_u = (zeros > 0 || rp == up);	 /* copy u if overlap or padding */
+
+  chop = MAX (-zeros, 0);		 /* negative zeros means shorten u */
+  up += chop;
+  usize -= chop;
+  zeros += chop;			 /* now zeros >= 0 */
+
+  tsize = usize + zeros;		 /* size for possible copy of u */
+
+  /* copy and possibly extend u if necessary */
+  if (copy_u)
+    {
+      tp = TMP_ALLOC_LIMBS (tsize + 1);	/* +1 for mpn_div_q's scratch needs */
+      MPN_ZERO (tp, zeros);
+      MPN_COPY (tp+zeros, up, usize);
+      up = tp;
+      usize = tsize;
+    }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (usize + 1);
+    }
+
+  /* ensure divisor doesn't overlap quotient */
+  if (rp == vp)
+    {
+      new_vp = TMP_ALLOC_LIMBS (vsize);
+      MPN_COPY (new_vp, vp, vsize);
+      vp = new_vp;
+    }
+
+  ASSERT (usize-vsize+1 == rsize);
+  mpn_div_q (rp, up, usize, vp, vsize, tp);
+
+  /* strip possible zero high limb */
+  high_zero = (rp[rsize-1] == 0);
+  rsize -= high_zero;
+  rexp -= high_zero;
+
+  SIZ(r) = sign_quotient >= 0 ? rsize : -rsize;
+  EXP(r) = rexp;
+  TMP_FREE;
+}

diff --git a/mpf/div_2exp.c b/mpf/div_2exp.c
new file mode 100644
index 0000000..ad552c1
--- /dev/null
+++ b/mpf/div_2exp.c

@@ -0,0 +1,138 @@
+/* mpf_div_2exp -- Divide a float by 2^n.
+
+Copyright 1993, 1994, 1996, 2000-2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Multiples of GMP_NUMB_BITS in exp simply mean an amount subtracted from
+   EXP(u) to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a right
+   shift for the limb data.
+
+   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just
+   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in
+   this case.  Although just prec would suffice, it's nice to have
+   mpf_div_2exp with exp==0 come out the same as mpz_set.
+
+   When shifting we take up to prec many limbs from the input.  Our shift is
+   cy = mpn_rshift (PTR(r)+1, PTR(u)+k, ...), where k is the number of low
+   limbs dropped from u, and the carry out is stored to PTR(r)[0].  We don't
+   try to work extra bits from PTR(u)[k-1] (when k>=1 makes it available)
+   into that low carry limb.  Just prec limbs (with the high non-zero) from
+   the input is enough bits for the application requested precision, no need
+   to do extra work.
+
+   If r==u the shift will have overlapping operands.  When k>=1 (ie. when
+   usize > prec), the overlap is in the style supported by rshift (ie. dst
+   <= src).
+
+   But when r==u and k==0 (ie. usize <= prec), we would have an invalid
+   overlap (mpn_rshift (rp+1, rp, ...)).  In this case we must instead use
+   mpn_lshift (PTR(r), PTR(u), size, NUMB-shift).  An lshift by NUMB-shift
+   bits gives identical data of course, it's just its overlap restrictions
+   which differ.
+
+   In both shift cases, the resulting data is abs_usize+1 limbs.  "adj" is
+   used to add +1 to that size if the high is non-zero (it may of course
+   have become zero by the shifting).  EXP(u) is the exponent just above
+   those abs_usize+1 limbs, so it gets -1+adj, which means -1 if the high is
+   zero, or no change if the high is non-zero.
+
+   Enhancements:
+
+   The way mpn_lshift is used means successive mpf_div_2exp calls on the
+   same operand will accumulate low zero limbs, until prec+1 limbs is
+   reached.  This is wasteful for subsequent operations.  When abs_usize <=
+   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],
+   ie. those which would be shifted out by an mpn_rshift.  If they're zero
+   then use that mpn_rshift.  */
+
+void
+mpf_div_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)
+{
+  mp_srcptr up;
+  mp_ptr rp = r->_mp_d;
+  mp_size_t usize;
+  mp_size_t abs_usize;
+  mp_size_t prec = r->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+
+  if (UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  abs_usize = ABS (usize);
+  up = u->_mp_d;
+
+  if (exp % GMP_NUMB_BITS == 0)
+    {
+      prec++;			/* retain more precision here as we don't need
+				   to account for carry-out here */
+      if (abs_usize > prec)
+	{
+	  up += abs_usize - prec;
+	  abs_usize = prec;
+	}
+      if (rp != up)
+	MPN_COPY_INCR (rp, up, abs_usize);
+      r->_mp_exp = uexp - exp / GMP_NUMB_BITS;
+    }
+  else
+    {
+      mp_limb_t cy_limb;
+      mp_size_t adj;
+      if (abs_usize > prec)
+	{
+	  up += abs_usize - prec;
+	  abs_usize = prec;
+	  /* Use mpn_rshift since mpn_lshift operates downwards, and we
+	     therefore would clobber part of U before using that part, in case
+	     R is the same variable as U.  */
+	  cy_limb = mpn_rshift (rp + 1, up, abs_usize, exp % GMP_NUMB_BITS);
+	  rp[0] = cy_limb;
+	  adj = rp[abs_usize] != 0;
+	}
+      else
+	{
+	  cy_limb = mpn_lshift (rp, up, abs_usize,
+				GMP_NUMB_BITS - exp % GMP_NUMB_BITS);
+	  rp[abs_usize] = cy_limb;
+	  adj = cy_limb != 0;
+	}
+
+      abs_usize += adj;
+      r->_mp_exp = uexp - exp / GMP_NUMB_BITS - 1 + adj;
+    }
+  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;
+}

diff --git a/mpf/div_ui.c b/mpf/div_ui.c
new file mode 100644
index 0000000..e1b0112
--- /dev/null
+++ b/mpf/div_ui.c

@@ -0,0 +1,110 @@
+/* mpf_div_ui -- Divide a float with an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000-2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpf_div_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up;
+  mp_ptr rp, tp, rtp;
+  mp_size_t usize;
+  mp_size_t rsize, tsize;
+  mp_size_t sign_quotient;
+  mp_size_t prec;
+  mp_limb_t q_limb;
+  mp_exp_t rexp;
+  TMP_DECL;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpf_t vf;
+      mp_limb_t vl[2];
+      SIZ(vf) = 2;
+      EXP(vf) = 2;
+      PTR(vf) = vl;
+      vl[0] = v & GMP_NUMB_MASK;
+      vl[1] = v >> GMP_NUMB_BITS;
+      mpf_div (r, u, vf);
+      return;
+    }
+#endif
+
+  if (UNLIKELY (v == 0))
+    DIVIDE_BY_ZERO;
+
+  usize = u->_mp_size;
+
+  if (usize == 0)
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  sign_quotient = usize;
+  usize = ABS (usize);
+  prec = r->_mp_prec;
+
+  TMP_MARK;
+
+  rp = r->_mp_d;
+  up = u->_mp_d;
+
+  tsize = 1 + prec;
+  tp = TMP_ALLOC_LIMBS (tsize + 1);
+
+  if (usize > tsize)
+    {
+      up += usize - tsize;
+      usize = tsize;
+      rtp = tp;
+    }
+  else
+    {
+      MPN_ZERO (tp, tsize - usize);
+      rtp = tp + (tsize - usize);
+    }
+
+  /* Move the dividend to the remainder.  */
+  MPN_COPY (rtp, up, usize);
+
+  mpn_divmod_1 (rp, tp, tsize, (mp_limb_t) v);
+  q_limb = rp[tsize - 1];
+
+  rsize = tsize - (q_limb == 0);
+  rexp = u->_mp_exp - (q_limb == 0);
+  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
+  r->_mp_exp = rexp;
+  TMP_FREE;
+}

diff --git a/mpf/dump.c b/mpf/dump.c
new file mode 100644
index 0000000..cd37dab
--- /dev/null
+++ b/mpf/dump.c

@@ -0,0 +1,52 @@
+/* mpf_dump -- Dump a float to stdout.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h> /* for strlen */
+#include "gmp-impl.h"
+
+void
+mpf_dump (mpf_srcptr u)
+{
+  mp_exp_t exp;
+  char *str;
+
+  str = mpf_get_str (0, &exp, 10, 0, u);
+  if (str[0] == '-')
+    printf ("-0.%se%ld\n", str + 1, exp);
+  else
+    printf ("0.%se%ld\n", str, exp);
+  (*__gmp_free_func) (str, strlen (str) + 1);
+}

diff --git a/mpf/eq.c b/mpf/eq.c
new file mode 100644
index 0000000..cddb9d5
--- /dev/null
+++ b/mpf/eq.c

@@ -0,0 +1,149 @@
+/* mpf_eq -- Compare two floats up to a specified bit #.
+
+Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+int
+mpf_eq (mpf_srcptr u, mpf_srcptr v, mp_bitcnt_t n_bits)
+{
+  mp_srcptr up, vp, p;
+  mp_size_t usize, vsize, minsize, maxsize, n_limbs, i, size;
+  mp_exp_t uexp, vexp;
+  mp_limb_t diff;
+  int cnt;
+
+  uexp = u->_mp_exp;
+  vexp = v->_mp_exp;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* 1. Are the signs different?  */
+  if ((usize ^ vsize) >= 0)
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+	return vsize == 0;
+      if (vsize == 0)
+	return 0;
+
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return 0;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  /* 2. Are the exponents different?  */
+  if (uexp != vexp)
+    return 0;
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  up += usize;			/* point just above most significant limb */
+  vp += vsize;			/* point just above most significant limb */
+
+  count_leading_zeros (cnt, up[-1]);
+  if ((vp[-1] >> (GMP_LIMB_BITS - 1 - cnt)) != 1)
+    return 0;			/* msb positions different */
+
+  n_bits += cnt - GMP_NAIL_BITS;
+  n_limbs = (n_bits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
+
+  usize = MIN (usize, n_limbs);
+  vsize = MIN (vsize, n_limbs);
+
+#if 0
+  /* Ignore zeros at the low end of U and V.  */
+  while (up[0] == 0)
+    up++, usize--;
+  while (vp[0] == 0)
+    vp++, vsize--;
+#endif
+
+  minsize = MIN (usize, vsize);
+  maxsize = usize + vsize - minsize;
+
+  up -= minsize;		/* point at most significant common limb */
+  vp -= minsize;		/* point at most significant common limb */
+
+  /* Compare the most significant part which has explicit limbs for U and V. */
+  for (i = minsize - 1; i > 0; i--)
+    {
+      if (up[i] != vp[i])
+	return 0;
+    }
+
+  n_bits -= (maxsize - 1) * GMP_NUMB_BITS;
+
+  size = maxsize - minsize;
+  if (size != 0)
+    {
+      if (up[0] != vp[0])
+	return 0;
+
+      /* Now either U or V has its limbs consumed, i.e, continues with an
+	 infinite number of implicit zero limbs.  Check that the other operand
+	 has just zeros in the corresponding, relevant part.  */
+
+      if (usize > vsize)
+	p = up - size;
+      else
+	p = vp - size;
+
+      for (i = size - 1; i > 0; i--)
+	{
+	  if (p[i] != 0)
+	    return 0;
+	}
+
+      diff = p[0];
+    }
+  else
+    {
+      /* Both U or V has its limbs consumed.  */
+
+      diff = up[0] ^ vp[0];
+    }
+
+  if (n_bits < GMP_NUMB_BITS)
+    diff >>= GMP_NUMB_BITS - n_bits;
+
+  return diff == 0;
+}

diff --git a/mpf/fits_s.h b/mpf/fits_s.h
new file mode 100644
index 0000000..80e74be
--- /dev/null
+++ b/mpf/fits_s.h

@@ -0,0 +1,71 @@
+/* mpf_fits_s*_p -- test whether an mpf fits a C signed type.
+
+Copyright 2001, 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Notice this is equivalent to mpz_set_f + mpz_fits_s*_p.  */
+
+int
+FUNCTION (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t  fs, fn;
+  mp_srcptr  fp;
+  mp_exp_t   exp;
+  mp_limb_t  fl;
+
+  exp = EXP(f);
+  if (exp < 1)
+    return 1;  /* -1 < f < 1 truncates to zero, so fits */
+
+  fs = SIZ (f);
+  fp = PTR(f);
+  fn = ABS (fs);
+
+  if (exp == 1)
+    {
+      fl = fp[fn-1];
+    }
+#if GMP_NAIL_BITS != 0
+  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)
+    {
+      fl = fp[fn-1];
+      if ((fl >> GMP_NAIL_BITS) != 0)
+	return 0;
+      fl = (fl << GMP_NUMB_BITS);
+      if (fn >= 2)
+        fl |= fp[fn-2];
+    }
+#endif
+  else
+    return 0;
+
+  return fl <= (fs >= 0 ? (mp_limb_t) MAXIMUM : NEG_CAST (mp_limb_t, MINIMUM));
+}

diff --git a/mpf/fits_sint.c b/mpf/fits_sint.c
new file mode 100644
index 0000000..26ace07
--- /dev/null
+++ b/mpf/fits_sint.c

@@ -0,0 +1,36 @@
+/* mpf_fits_sint_p -- test whether an mpf fits an int.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_sint_p
+#define MAXIMUM    INT_MAX
+#define MINIMUM    INT_MIN
+
+#include "fits_s.h"

diff --git a/mpf/fits_slong.c b/mpf/fits_slong.c
new file mode 100644
index 0000000..25db68c
--- /dev/null
+++ b/mpf/fits_slong.c

@@ -0,0 +1,36 @@
+/* mpf_fits_slong_p -- test whether an mpf fits a long.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_slong_p
+#define MAXIMUM    LONG_MAX
+#define MINIMUM    LONG_MIN
+
+#include "fits_s.h"

diff --git a/mpf/fits_sshort.c b/mpf/fits_sshort.c
new file mode 100644
index 0000000..3bfc5a4
--- /dev/null
+++ b/mpf/fits_sshort.c

@@ -0,0 +1,36 @@
+/* mpf_fits_sshort_p -- test whether an mpf fits a short.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_sshort_p
+#define MAXIMUM    SHRT_MAX
+#define MINIMUM    SHRT_MIN
+
+#include "fits_s.h"

diff --git a/mpf/fits_u.h b/mpf/fits_u.h
new file mode 100644
index 0000000..bd7ca78
--- /dev/null
+++ b/mpf/fits_u.h

@@ -0,0 +1,73 @@
+/* mpf_fits_u*_p -- test whether an mpf fits a C unsigned type.
+
+Copyright 2001, 2002, 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Notice this is equivalent to mpz_set_f + mpz_fits_u*_p.  */
+
+int
+FUNCTION (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t  fn;
+  mp_srcptr  fp;
+  mp_exp_t   exp;
+  mp_limb_t  fl;
+
+  exp = EXP(f);
+  if (exp < 1)
+    return 1;  /* -1 < f < 1 truncates to zero, so fits */
+
+  fn = SIZ(f);
+  if (fn < 0) /* zero catched by exp == 0 */
+    return 0; /* negatives don't fit */
+
+  fp = PTR(f);
+
+  if (exp == 1)
+    {
+      fl = fp[fn-1];
+    }
+#if GMP_NAIL_BITS != 0
+  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)
+    {
+      fl = fp[fn-1];
+      if ((fl >> GMP_NAIL_BITS) != 0)
+	return 0;
+      fl = (fl << GMP_NUMB_BITS);
+      if (fn >= 2)
+        fl |= fp[fn-2];
+    }
+#endif
+  else
+    return 0;
+
+  return fl <= MAXIMUM;
+}

diff --git a/mpf/fits_uint.c b/mpf/fits_uint.c
new file mode 100644
index 0000000..4b107b0
--- /dev/null
+++ b/mpf/fits_uint.c

@@ -0,0 +1,35 @@
+/* mpf_fits_uint_p -- test whether an mpf fits an unsigned int.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_uint_p
+#define MAXIMUM   UINT_MAX
+
+#include "fits_u.h"

diff --git a/mpf/fits_ulong.c b/mpf/fits_ulong.c
new file mode 100644
index 0000000..1db688c
--- /dev/null
+++ b/mpf/fits_ulong.c

@@ -0,0 +1,35 @@
+/* mpf_fits_ulong_p -- test whether an mpf fits an unsigned long.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_ulong_p
+#define MAXIMUM   ULONG_MAX
+
+#include "fits_u.h"

diff --git a/mpf/fits_ushort.c b/mpf/fits_ushort.c
new file mode 100644
index 0000000..76a3fd9
--- /dev/null
+++ b/mpf/fits_ushort.c

@@ -0,0 +1,35 @@
+/* mpf_fits_ushort_p -- test whether an mpf fits an unsigned short.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_ushort_p
+#define MAXIMUM   USHRT_MAX
+
+#include "fits_u.h"

diff --git a/mpf/get_d.c b/mpf/get_d.c
new file mode 100644
index 0000000..34826fb
--- /dev/null
+++ b/mpf/get_d.c

@@ -0,0 +1,46 @@
+/* double mpf_get_d (mpf_t src) -- return SRC truncated to a double.
+
+Copyright 1996, 2001-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+double
+mpf_get_d (mpf_srcptr src)
+{
+  mp_size_t  size, abs_size;
+  long       exp;
+
+  size = SIZ (src);
+  if (UNLIKELY (size == 0))
+    return 0.0;
+
+  abs_size = ABS (size);
+  exp = (EXP (src) - abs_size) * GMP_NUMB_BITS;
+  return mpn_get_d (PTR (src), abs_size, size, exp);
+}

diff --git a/mpf/get_d_2exp.c b/mpf/get_d_2exp.c
new file mode 100644
index 0000000..440a753
--- /dev/null
+++ b/mpf/get_d_2exp.c

@@ -0,0 +1,56 @@
+/* double mpf_get_d_2exp (signed long int *exp, mpf_t src).
+
+Copyright 2001-2004, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+double
+mpf_get_d_2exp (signed long int *expptr, mpf_srcptr src)
+{
+  mp_size_t size, abs_size;
+  mp_srcptr ptr;
+  int cnt;
+
+  size = SIZ(src);
+  if (UNLIKELY (size == 0))
+    {
+      *expptr = 0;
+      return 0.0;
+    }
+
+  ptr = PTR(src);
+  abs_size = ABS (size);
+  count_leading_zeros (cnt, ptr[abs_size - 1]);
+  cnt -= GMP_NAIL_BITS;
+
+  *expptr = EXP(src) * GMP_NUMB_BITS - cnt;
+  return mpn_get_d (ptr, abs_size, size, -(abs_size * GMP_NUMB_BITS - cnt));
+}

diff --git a/mpf/get_dfl_prec.c b/mpf/get_dfl_prec.c
new file mode 100644
index 0000000..13fc514
--- /dev/null
+++ b/mpf/get_dfl_prec.c

@@ -0,0 +1,38 @@
+/* mpf_get_default_prec -- return default precision in bits.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+mp_bitcnt_t
+mpf_get_default_prec (void) __GMP_NOTHROW
+{
+  return __GMPF_PREC_TO_BITS (__gmp_default_fp_limb_precision);
+}

diff --git a/mpf/get_prc.c b/mpf/get_prc.c
new file mode 100644
index 0000000..8dee99e
--- /dev/null
+++ b/mpf/get_prc.c

@@ -0,0 +1,37 @@
+/* mpf_get_prec(x) -- Return the precision in bits of x.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_bitcnt_t
+mpf_get_prec (mpf_srcptr x) __GMP_NOTHROW
+{
+  return __GMPF_PREC_TO_BITS (x->_mp_prec);
+}

diff --git a/mpf/get_si.c b/mpf/get_si.c
new file mode 100644
index 0000000..6ac4d44
--- /dev/null
+++ b/mpf/get_si.c

@@ -0,0 +1,86 @@
+/* mpf_get_si -- mpf to long conversion
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Any fraction bits are truncated, meaning simply discarded.
+
+   For values bigger than a long, the low bits are returned, like
+   mpz_get_si, but this isn't documented.
+
+   Notice this is equivalent to mpz_set_f + mpz_get_si.
+
+
+   Implementation:
+
+   fl is established in basically the same way as for mpf_get_ui, see that
+   code for explanations of the conditions.
+
+   However unlike mpf_get_ui we need an explicit return 0 for exp<=0.  When
+   f is a negative fraction (ie. size<0 and exp<=0) we can't let fl==0 go
+   through to the zany final "~ ((fl - 1) & LONG_MAX)", that would give
+   -0x80000000 instead of the desired 0.  */
+
+long
+mpf_get_si (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_exp_t exp;
+  mp_size_t size, abs_size;
+  mp_srcptr fp;
+  mp_limb_t fl;
+
+  exp = EXP (f);
+  size = SIZ (f);
+  fp = PTR (f);
+
+  /* fraction alone truncates to zero
+     this also covers zero, since we have exp==0 for zero */
+  if (exp <= 0)
+    return 0L;
+
+  /* there are some limbs above the radix point */
+
+  fl = 0;
+  abs_size = ABS (size);
+  if (abs_size >= exp)
+    fl = fp[abs_size-exp];
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (exp > 1 && abs_size+1 >= exp)
+    fl |= fp[abs_size - exp + 1] << GMP_NUMB_BITS;
+#endif
+
+  if (size > 0)
+    return fl & LONG_MAX;
+  else
+    /* this form necessary to correctly handle -0x80..00 */
+    return -1 - (long) ((fl - 1) & LONG_MAX);
+}

diff --git a/mpf/get_str.c b/mpf/get_str.c
new file mode 100644
index 0000000..946c4ae
--- /dev/null
+++ b/mpf/get_str.c

@@ -0,0 +1,320 @@
+/* mpf_get_str (digit_ptr, exp, base, n_digits, a) -- Convert the floating
+   point number A to a base BASE number and store N_DIGITS raw digits at
+   DIGIT_PTR, and the base BASE exponent in the word pointed to by EXP.  For
+   example, the number 3.1416 would be returned as "31416" in DIGIT_PTR and
+   1 in EXP.
+
+Copyright 1993-1997, 2000-2003, 2005, 2006, 2011, 2015, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"		/* for count_leading_zeros */
+
+/* Could use some more work.
+
+   1. Allocation is excessive.  Try to combine areas.  Perhaps use result
+      string area for temp limb space?
+   2. We generate up to two limbs of extra digits.  This is because we don't
+      check the exact number of bits in the input operand, and from that
+      compute an accurate exponent (variable e in the code).  It would be
+      cleaner and probably somewhat faster to change this.
+*/
+
+/* Compute base^exp and return the most significant prec limbs in rp[].
+   Put the count of omitted low limbs in *ign.
+   Return the actual size (which might be less than prec).
+   Allocation of rp[] and the temporary tp[] should be 2*prec+2 limbs.  */
+static mp_size_t
+mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
+		    mp_limb_t base, unsigned long exp,
+		    mp_size_t prec, mp_ptr tp)
+{
+  mp_size_t ign;		/* counts number of ignored low limbs in r */
+  mp_size_t off;		/* keeps track of offset where value starts */
+  mp_ptr passed_rp = rp;
+  mp_size_t rn;
+  int cnt;
+  int i;
+
+  if (exp == 0)
+    {
+      rp[0] = 1;
+      *ignp = 0;
+      return 1;
+    }
+
+  rp[0] = base;
+  rn = 1;
+  off = 0;
+  ign = 0;
+  count_leading_zeros (cnt, exp);
+  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
+    {
+      mpn_sqr (tp, rp + off, rn);
+      rn = 2 * rn;
+      rn -= tp[rn - 1] == 0;
+      ign <<= 1;
+
+      off = 0;
+      if (rn > prec)
+	{
+	  ign += rn - prec;
+	  off = rn - prec;
+	  rn = prec;
+	}
+      MP_PTR_SWAP (rp, tp);
+
+      if (((exp >> i) & 1) != 0)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_mul_1 (rp, rp + off, rn, base);
+	  rp[rn] = cy;
+	  rn += cy != 0;
+	  off = 0;
+	}
+    }
+
+  if (rn > prec)
+    {
+      ASSERT (rn == prec + 1);
+
+      ign += rn - prec;
+      rp += rn - prec;
+      rn = prec;
+    }
+
+  /* With somewhat less than 50% probability, we can skip this copy.  */
+  if (passed_rp != rp + off)
+    MPN_COPY_INCR (passed_rp, rp + off, rn);
+  *ignp = ign;
+  return rn;
+}
+
+char *
+mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
+{
+  mp_exp_t ue;
+  mp_size_t n_limbs_needed;
+  size_t max_digits;
+  mp_ptr up, pp, tp;
+  mp_size_t un, pn, tn;
+  unsigned char *tstr;
+  mp_exp_t exp_in_base;
+  size_t n_digits_computed;
+  mp_size_t i;
+  const char *num_to_text;
+  size_t alloc_size = 0;
+  char *dp;
+  TMP_DECL;
+
+  up = PTR(u);
+  un = ABSIZ(u);
+  ue = EXP(u);
+
+  num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+  if (base > 1)
+    {
+      if (base <= 36)
+	num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      else if (UNLIKELY (base > 62))
+	    return NULL;
+    }
+  else if (base > -2)
+    {
+      base = 10;
+    }
+  else
+    {
+      base = -base;
+      if (UNLIKELY (base > 36))
+	return NULL;
+    }
+
+  MPF_SIGNIFICANT_DIGITS (max_digits, base, PREC(u));
+  if (n_digits == 0 || n_digits > max_digits)
+    n_digits = max_digits;
+
+  if (dbuf == 0)
+    {
+      /* We didn't get a string from the user.  Allocate one (and return
+	 a pointer to it) with space for `-' and terminating null.  */
+      alloc_size = n_digits + 2;
+      dbuf = __GMP_ALLOCATE_FUNC_TYPE (n_digits + 2, char);
+    }
+
+  if (un == 0)
+    {
+      *exp = 0;
+      *dbuf = 0;
+      n_digits = 0;
+      goto done;
+    }
+
+  TMP_MARK;
+
+  /* Allocate temporary digit space.  We can't put digits directly in the user
+     area, since we generate more digits than requested.  (We allocate
+     2 * GMP_LIMB_BITS extra bytes because of the digit block nature of the
+     conversion.)  */
+  tstr = (unsigned char *) TMP_ALLOC (n_digits + 2 * GMP_LIMB_BITS + 3);
+
+  LIMBS_PER_DIGIT_IN_BASE (n_limbs_needed, n_digits, base);
+
+  if (un > n_limbs_needed)
+    {
+      up += un - n_limbs_needed;
+      un = n_limbs_needed;
+    }
+
+  TMP_ALLOC_LIMBS_2 (pp, 2 * n_limbs_needed + 4,
+		     tp, 2 * n_limbs_needed + 4);
+
+  if (ue <= n_limbs_needed)
+    {
+      /* We need to multiply number by base^n to get an n_digits integer part.  */
+      mp_size_t n_more_limbs_needed, ign, off;
+      unsigned long e;
+
+      n_more_limbs_needed = n_limbs_needed - ue;
+      DIGITS_IN_BASE_PER_LIMB (e, n_more_limbs_needed, base);
+
+      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed + 1, tp);
+      if (un > pn)
+	mpn_mul (tp, up, un, pp, pn);	/* FIXME: mpn_mul_highpart */
+      else
+	mpn_mul (tp, pp, pn, up, un);	/* FIXME: mpn_mul_highpart */
+      tn = un + pn;
+      tn -= tp[tn - 1] == 0;
+      off = un - ue - ign;
+      if (off < 0)
+	{
+	  MPN_COPY_DECR (tp - off, tp, tn);
+	  MPN_ZERO (tp, -off);
+	  tn -= off;
+	  off = 0;
+	}
+      n_digits_computed = mpn_get_str (tstr, base, tp + off, tn - off);
+
+      exp_in_base = n_digits_computed - e;
+    }
+  else
+    {
+      /* We need to divide number by base^n to get an n_digits integer part.  */
+      mp_size_t n_less_limbs_needed, ign, off, xn;
+      unsigned long e;
+      mp_ptr dummyp, xp;
+
+      n_less_limbs_needed = ue - n_limbs_needed;
+      DIGITS_IN_BASE_PER_LIMB (e, n_less_limbs_needed, base);
+
+      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed + 1, tp);
+
+      xn = n_limbs_needed + (n_less_limbs_needed-ign);
+      xp = TMP_ALLOC_LIMBS (xn);
+      off = xn - un;
+      MPN_ZERO (xp, off);
+      MPN_COPY (xp + off, up, un);
+
+      dummyp = TMP_ALLOC_LIMBS (pn);
+      mpn_tdiv_qr (tp, dummyp, (mp_size_t) 0, xp, xn, pp, pn);
+      tn = xn - pn + 1;
+      tn -= tp[tn - 1] == 0;
+      n_digits_computed = mpn_get_str (tstr, base, tp, tn);
+
+      exp_in_base = n_digits_computed + e;
+    }
+
+  /* We should normally have computed too many digits.  Round the result
+     at the point indicated by n_digits.  */
+  if (n_digits_computed > n_digits)
+    {
+      size_t i;
+      /* Round the result.  */
+      if (tstr[n_digits] * 2 >= base)
+	{
+	  n_digits_computed = n_digits;
+	  for (i = n_digits - 1;; i--)
+	    {
+	      unsigned int x;
+	      x = ++(tstr[i]);
+	      if (x != base)
+		break;
+	      n_digits_computed--;
+	      if (i == 0)
+		{
+		  /* We had something like `bbbbbbb...bd', where 2*d >= base
+		     and `b' denotes digit with significance base - 1.
+		     This rounds up to `1', increasing the exponent.  */
+		  tstr[0] = 1;
+		  n_digits_computed = 1;
+		  exp_in_base++;
+		  break;
+		}
+	    }
+	}
+    }
+
+  /* We might have fewer digits than requested as a result of rounding above,
+     (i.e. 0.999999 => 1.0) or because we have a number that simply doesn't
+     need many digits in this base (e.g., 0.125 in base 10).  */
+  if (n_digits > n_digits_computed)
+    n_digits = n_digits_computed;
+
+  /* Remove trailing 0.  There can be many zeros.  */
+  while (n_digits != 0 && tstr[n_digits - 1] == 0)
+    n_digits--;
+
+  dp = dbuf + (SIZ(u) < 0);
+
+  /* Translate to ASCII and copy to result string.  */
+  for (i = 0; i < n_digits; i++)
+    dp[i] = num_to_text[tstr[i]];
+  dp[n_digits] = 0;
+
+  *exp = exp_in_base;
+
+  if (SIZ(u) < 0)
+    {
+      dbuf[0] = '-';
+      n_digits++;
+    }
+
+  TMP_FREE;
+
+ done:
+  /* If the string was alloced then resize it down to the actual space
+     required.  */
+  if (alloc_size != 0)
+    {
+      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (dbuf, alloc_size, n_digits + 1, char);
+    }
+
+  return dbuf;
+}

diff --git a/mpf/get_ui.c b/mpf/get_ui.c
new file mode 100644
index 0000000..e7b9333
--- /dev/null
+++ b/mpf/get_ui.c

@@ -0,0 +1,101 @@
+/* mpf_get_ui -- mpf to ulong conversion
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Any fraction bits are truncated, meaning simply discarded.
+
+   For values bigger than a ulong, the low bits are returned (the low
+   absolute value bits actually), like mpz_get_ui, but this isn't
+   documented.
+
+   Notice this is equivalent to mpz_set_f + mpz_get_ui.
+
+
+   Implementation:
+
+   The limb just above the radix point for us to extract is ptr[size-exp].
+
+   We need to check that the size-exp index falls in our available data
+   range, 0 to size-1 inclusive.  We test this without risk of an overflow
+   involving exp by requiring size>=exp (giving size-exp >= 0) and exp>0
+   (giving size-exp <= size-1).
+
+   Notice if size==0 there's no fetch, since of course size>=exp and exp>0
+   can only be true if size>0.  So there's no special handling for size==0,
+   it comes out as 0 the same as any other time we have no data at our
+   target index.
+
+   For nails, the second limb above the radix point is also required, this
+   is ptr[size-exp+1].
+
+   Again we need to check that size-exp+1 falls in our data range, 0 to
+   size-1 inclusive.  We test without risk of overflow by requiring
+   size+1>=exp (giving size-exp+1 >= 0) and exp>1 (giving size-exp+1 <=
+   size-1).
+
+   And again if size==0 these second fetch conditions are not satisfied
+   either since size+1>=exp and exp>1 are only true if size>0.
+
+   The code is arranged with exp>0 wrapping the exp>1 test since exp>1 is
+   mis-compiled by alpha gcc prior to version 3.4.  It re-writes it as
+   exp-1>0, which is incorrect when exp==MP_EXP_T_MIN.  By having exp>0
+   tested first we ensure MP_EXP_T_MIN doesn't reach exp>1.  */
+
+unsigned long
+mpf_get_ui (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  mp_srcptr fp;
+  mp_limb_t fl;
+
+  exp = EXP (f);
+  size = SIZ (f);
+  fp = PTR (f);
+
+  fl = 0;
+  if (exp > 0)
+    {
+      /* there are some limbs above the radix point */
+
+      size = ABS (size);
+      if (size >= exp)
+        fl = fp[size-exp];
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+      if (exp > 1 && size+1 >= exp)
+        fl += (fp[size-exp+1] << GMP_NUMB_BITS);
+#endif
+    }
+
+  return (unsigned long) fl;
+}

diff --git a/mpf/init.c b/mpf/init.c
new file mode 100644
index 0000000..26ab262
--- /dev/null
+++ b/mpf/init.c

@@ -0,0 +1,41 @@
+/* mpf_init() -- Make a new multiple precision number with value 0.
+
+Copyright 1993-1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init (mpf_ptr r)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+}

diff --git a/mpf/init2.c b/mpf/init2.c
new file mode 100644
index 0000000..b90a08a
--- /dev/null
+++ b/mpf/init2.c

@@ -0,0 +1,43 @@
+/* mpf_init2() -- Make a new multiple precision number with value 0.
+
+Copyright 1993-1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init2 (mpf_ptr r, mp_bitcnt_t prec_in_bits)
+{
+  mp_size_t prec;
+
+  prec = __GMPF_BITS_TO_PREC (prec_in_bits);
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+}

diff --git a/mpf/inits.c b/mpf/inits.c
new file mode 100644
index 0000000..b6d054f
--- /dev/null
+++ b/mpf/inits.c

@@ -0,0 +1,49 @@
+/* mpf_inits() -- Initialize multiple mpf_t variables and set them to 0.
+
+Copyright 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpf_inits (mpf_ptr x, ...)
+{
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      mpf_init (x);
+      x = va_arg (ap, mpf_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpf/inp_str.c b/mpf/inp_str.c
new file mode 100644
index 0000000..c661a79
--- /dev/null
+++ b/mpf/inp_str.c

@@ -0,0 +1,92 @@
+/* mpf_inp_str(dest_float, stream, base) -- Input a number in base
+   BASE from stdio stream STREAM and store the result in DEST_FLOAT.
+
+Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp-impl.h"
+
+size_t
+mpf_inp_str (mpf_ptr rop, FILE *stream, int base)
+{
+  char *str;
+  size_t alloc_size, str_size;
+  int c;
+  int res;
+  size_t nread;
+
+  if (stream == 0)
+    stream = stdin;
+
+  alloc_size = 100;
+  str = __GMP_ALLOCATE_FUNC_TYPE (alloc_size, char);
+  str_size = 0;
+  nread = 0;
+
+  /* Skip whitespace.  */
+  do
+    {
+      c = getc (stream);
+      nread++;
+    }
+  while (isspace (c));
+
+  for (;;)
+    {
+      if (str_size >= alloc_size)
+	{
+	  size_t old_alloc_size = alloc_size;
+	  alloc_size = alloc_size * 3 / 2;
+	  str = __GMP_REALLOCATE_FUNC_TYPE (str, old_alloc_size, alloc_size, char);
+	}
+      if (c == EOF || isspace (c))
+	break;
+      str[str_size++] = c;
+      c = getc (stream);
+    }
+  ungetc (c, stream);
+  nread--;
+
+  if (str_size >= alloc_size)
+    {
+      size_t old_alloc_size = alloc_size;
+      alloc_size = alloc_size * 3 / 2;
+      str = __GMP_REALLOCATE_FUNC_TYPE (str, old_alloc_size, alloc_size, char);
+    }
+  str[str_size] = 0;
+
+  res = mpf_set_str (rop, str, base);
+  (*__gmp_free_func) (str, alloc_size);
+
+  if (res == -1)
+    return 0;			/* error */
+
+  return str_size + nread;
+}

diff --git a/mpf/int_p.c b/mpf/int_p.c
new file mode 100644
index 0000000..024cfb5
--- /dev/null
+++ b/mpf/int_p.c

@@ -0,0 +1,55 @@
+/* mpf_integer_p -- test whether an mpf is an integer */
+
+/*
+Copyright 2001, 2002, 2014-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+int
+mpf_integer_p (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_srcptr fp;
+  mp_exp_t exp;
+  mp_size_t size;
+
+  size = SIZ (f);
+  exp = EXP (f);
+  if (exp <= 0)
+    return (size == 0);  /* zero is an integer,
+			    others have only fraction limbs */
+  size = ABS (size);
+
+  /* Ignore zeroes at the low end of F.  */
+  for (fp = PTR (f); *fp == 0; ++fp)
+    --size;
+
+  /* no fraction limbs */
+  return size <= exp;
+}

diff --git a/mpf/iset.c b/mpf/iset.c
new file mode 100644
index 0000000..07f9006
--- /dev/null
+++ b/mpf/iset.c

@@ -0,0 +1,61 @@
+/* mpf_init_set -- Initialize a float and assign it from another float.
+
+Copyright 1993-1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init_set (mpf_ptr r, mpf_srcptr s)
+{
+  mp_ptr rp, sp;
+  mp_size_t ssize, size;
+  mp_size_t prec;
+
+  prec = __gmp_default_fp_limb_precision;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+  r->_mp_prec = prec;
+
+  prec++;		/* lie not to lose precision in assignment */
+  ssize = s->_mp_size;
+  size = ABS (ssize);
+
+  rp = r->_mp_d;
+  sp = s->_mp_d;
+
+  if (size > prec)
+    {
+      sp += size - prec;
+      size = prec;
+    }
+
+  r->_mp_exp = s->_mp_exp;
+  r->_mp_size = ssize >= 0 ? size : -size;
+
+  MPN_COPY (rp, sp, size);
+}

diff --git a/mpf/iset_d.c b/mpf/iset_d.c
new file mode 100644
index 0000000..2f36240
--- /dev/null
+++ b/mpf/iset_d.c

@@ -0,0 +1,41 @@
+/* mpf_init_set_d -- Initialize a float and assign it from a double.
+
+Copyright 1993-1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init_set_d (mpf_ptr r, double val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+
+  mpf_set_d (r, val);
+}

diff --git a/mpf/iset_si.c b/mpf/iset_si.c
new file mode 100644
index 0000000..65abb9a
--- /dev/null
+++ b/mpf/iset_si.c

@@ -0,0 +1,57 @@
+/* mpf_init_set_si() -- Initialize a float and assign it from a signed int.
+
+Copyright 1993-1995, 2000, 2001, 2003, 2004, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init_set_si (mpf_ptr r, long int val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  mp_size_t size;
+  mp_limb_t vl;
+
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  r->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  vl >>= GMP_NUMB_BITS;
+  r->_mp_d[1] = vl;
+  size += (vl != 0);
+#endif
+
+  r->_mp_exp = size;
+  r->_mp_size = val >= 0 ? size : -size;
+}

diff --git a/mpf/iset_str.c b/mpf/iset_str.c
new file mode 100644
index 0000000..10acda9
--- /dev/null
+++ b/mpf/iset_str.c

@@ -0,0 +1,43 @@
+/* mpf_init_set_str -- Initialize a float and assign it from a string.
+
+Copyright 1995, 1996, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpf_init_set_str (mpf_ptr r, const char *s, int base)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+
+  return mpf_set_str (r, s, base);
+}

diff --git a/mpf/iset_ui.c b/mpf/iset_ui.c
new file mode 100644
index 0000000..2c426bf
--- /dev/null
+++ b/mpf/iset_ui.c

@@ -0,0 +1,52 @@
+/* mpf_init_set_ui() -- Initialize a float and assign it from an unsigned int.
+
+Copyright 1993-1995, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_init_set_ui (mpf_ptr r, unsigned long int val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  mp_size_t size;
+
+  r->_mp_prec = prec;
+  r->_mp_d = __GMP_ALLOCATE_FUNC_LIMBS (prec + 1);
+  r->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = (val != 0);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  val >>= GMP_NUMB_BITS;
+  r->_mp_d[1] = val;
+  size += (val != 0);
+#endif
+
+  r->_mp_size = size;
+  r->_mp_exp = size;
+}

diff --git a/mpf/mul.c b/mpf/mul.c
new file mode 100644
index 0000000..518c060
--- /dev/null
+++ b/mpf/mul.c

@@ -0,0 +1,134 @@
+/* mpf_mul -- Multiply two floats.
+
+Copyright 1993, 1994, 1996, 2001, 2005, 2019, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_mul (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_size_t sign_product;
+  mp_size_t prec = PREC (r);
+  mp_size_t rsize;
+  mp_limb_t cy_limb;
+  mp_ptr rp, tp;
+  mp_size_t adj;
+  TMP_DECL;
+
+  if (u == v)
+    {
+      mp_srcptr up;
+      mp_size_t usize;
+
+      sign_product = 0;
+
+      usize = ABSIZ (u);
+
+      up = PTR (u);
+      if (usize > prec)
+	{
+	  up += usize - prec;
+	  usize = prec;
+	}
+
+      if (usize == 0)
+	{
+	  SIZ (r) = 0;
+	  EXP (r) = 0;		/* ??? */
+	  return;
+	}
+      else
+	{
+	  TMP_MARK;
+	  rsize = 2 * usize;
+	  tp = TMP_ALLOC_LIMBS (rsize);
+
+	  mpn_sqr (tp, up, usize);
+	  cy_limb = tp[rsize - 1];
+	}
+    }
+  else
+    {
+      mp_srcptr up, vp;
+      mp_size_t usize, vsize;
+
+      usize = SIZ (u);
+      vsize = SIZ (v);
+      sign_product = usize ^ vsize;
+
+      usize = ABS (usize);
+      vsize = ABS (vsize);
+
+      up = PTR (u);
+      vp = PTR (v);
+      if (usize > prec)
+	{
+	  up += usize - prec;
+	  usize = prec;
+	}
+      if (vsize > prec)
+	{
+	  vp += vsize - prec;
+	  vsize = prec;
+	}
+
+      if (usize == 0 || vsize == 0)
+	{
+	  SIZ (r) = 0;
+	  EXP (r) = 0;
+	  return;
+	}
+      else
+	{
+	  TMP_MARK;
+	  rsize = usize + vsize;
+	  tp = TMP_ALLOC_LIMBS (rsize);
+	  cy_limb = (usize >= vsize
+		     ? mpn_mul (tp, up, usize, vp, vsize)
+		     : mpn_mul (tp, vp, vsize, up, usize));
+
+	}
+    }
+
+  adj = cy_limb == 0;
+  rsize -= adj;
+  prec++;
+  if (rsize > prec)
+    {
+      tp += rsize - prec;
+      rsize = prec;
+    }
+  rp = PTR (r);
+  MPN_COPY (rp, tp, rsize);
+  EXP (r) = EXP (u) + EXP (v) - adj;
+  SIZ (r) = sign_product >= 0 ? rsize : -rsize;
+
+  TMP_FREE;
+}

diff --git a/mpf/mul_2exp.c b/mpf/mul_2exp.c
new file mode 100644
index 0000000..5de7363
--- /dev/null
+++ b/mpf/mul_2exp.c

@@ -0,0 +1,132 @@
+/* mpf_mul_2exp -- Multiply a float by 2^n.
+
+Copyright 1993, 1994, 1996, 2000-2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Multiples of GMP_NUMB_BITS in exp simply mean an amount added to EXP(u)
+   to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a left shift for
+   the limb data.
+
+   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just
+   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in
+   this case.  Although just prec would suffice, it's nice to have
+   mpf_mul_2exp with exp==0 come out the same as mpz_set.
+
+   When shifting we take up to prec many limbs from the input.  Our shift is
+   cy = mpn_lshift (PTR(r), PTR(u)+k, size, ...), where k is the number of
+   low limbs dropped from u, and the carry out is stored to PTR(r)[size].
+
+   It may be noted that the low limb PTR(r)[0] doesn't incorporate bits from
+   PTR(u)[k-1] (when k>=1 makes that limb available).  Taking just prec
+   limbs from the input (with the high non-zero) is enough bits for the
+   application requested precision, there's no need for extra work.
+
+   If r==u the shift will have overlapping operands.  When k==0 (ie. when
+   usize <= prec), the overlap is supported by lshift (ie. dst == src).
+
+   But when r==u and k>=1 (ie. usize > prec), we would have an invalid
+   overlap (ie. mpn_lshift (rp, rp+k, ...)).  In this case we must instead
+   use mpn_rshift (PTR(r)+1, PTR(u)+k, size, NUMB-shift) with the carry out
+   stored to PTR(r)[0].  An rshift by NUMB-shift bits like this gives
+   identical data, it's just its overlap restrictions which differ.
+
+   Enhancements:
+
+   The way mpn_lshift is used means successive mpf_mul_2exp calls on the
+   same operand will accumulate low zero limbs, until prec+1 limbs is
+   reached.  This is wasteful for subsequent operations.  When abs_usize <=
+   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],
+   ie. those which would be shifted out by an mpn_rshift.  If they're zero
+   then use that mpn_rshift.  */
+
+void
+mpf_mul_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)
+{
+  mp_srcptr up;
+  mp_ptr rp = r->_mp_d;
+  mp_size_t usize;
+  mp_size_t abs_usize;
+  mp_size_t prec = r->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+
+  if (UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  abs_usize = ABS (usize);
+  up = u->_mp_d;
+
+  if (exp % GMP_NUMB_BITS == 0)
+    {
+      prec++;			/* retain more precision here as we don't need
+				   to account for carry-out here */
+      if (abs_usize > prec)
+	{
+	  up += abs_usize - prec;
+	  abs_usize = prec;
+	}
+      if (rp != up)
+	MPN_COPY_INCR (rp, up, abs_usize);
+      r->_mp_exp = uexp + exp / GMP_NUMB_BITS;
+    }
+  else
+    {
+      mp_limb_t cy_limb;
+      mp_size_t adj;
+      if (abs_usize > prec)
+	{
+	  up += abs_usize - prec;
+	  abs_usize = prec;
+	  /* Use mpn_rshift since mpn_lshift operates downwards, and we
+	     therefore would clobber part of U before using that part, in case
+	     R is the same variable as U.  */
+	  cy_limb = mpn_rshift (rp + 1, up, abs_usize,
+				GMP_NUMB_BITS - exp % GMP_NUMB_BITS);
+	  rp[0] = cy_limb;
+	  adj = rp[abs_usize] != 0;
+	}
+      else
+	{
+	  cy_limb = mpn_lshift (rp, up, abs_usize, exp % GMP_NUMB_BITS);
+	  rp[abs_usize] = cy_limb;
+	  adj = cy_limb != 0;
+	}
+
+      abs_usize += adj;
+      r->_mp_exp = uexp + exp / GMP_NUMB_BITS + adj;
+    }
+  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;
+}

diff --git a/mpf/mul_ui.c b/mpf/mul_ui.c
new file mode 100644
index 0000000..30da6ae
--- /dev/null
+++ b/mpf/mul_ui.c

@@ -0,0 +1,181 @@
+/* mpf_mul_ui -- Multiply a float and an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The core operation is a multiply of PREC(r) limbs from u by v, producing
+   either PREC(r) or PREC(r)+1 result limbs.  If u is shorter than PREC(r),
+   then we take only as much as it has.  If u is longer we incorporate a
+   carry from the lower limbs.
+
+   If u has just 1 extra limb, then the carry to add is high(up[0]*v).  That
+   is of course what mpn_mul_1 would do if it was called with PREC(r)+1
+   limbs of input.
+
+   If u has more than 1 extra limb, then there can be a further carry bit
+   out of lower uncalculated limbs (the way the low of one product adds to
+   the high of the product below it).  This is of course what an mpn_mul_1
+   would do if it was called with the full u operand.  But we instead work
+   downwards explicitly, until a carry occurs or until a value other than
+   GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate
+   across).
+
+   The carry determination normally requires two umul_ppmm's, only rarely
+   will GMP_NUMB_MAX occur and require further products.
+
+   The carry limb is conveniently added into the mul_1 using mpn_mul_1c when
+   that function exists, otherwise a subsequent mpn_add_1 is needed.
+
+   Clearly when mpn_mul_1c is used the carry must be calculated first.  But
+   this is also the case when add_1 is used, since if r==u and ABSIZ(r) >
+   PREC(r) then the mpn_mul_1 overwrites the low part of the input.
+
+   A reuse r==u with size > prec can occur from a size PREC(r)+1 in the
+   usual way, or it can occur from an mpf_set_prec_raw leaving a bigger
+   sized value.  In both cases we can end up calling mpn_mul_1 with
+   overlapping src and dst regions, but this will be with dst < src and such
+   an overlap is permitted.
+
+   Not done:
+
+   No attempt is made to determine in advance whether the result will be
+   PREC(r) or PREC(r)+1 limbs.  If it's going to be PREC(r)+1 then we could
+   take one less limb from u and generate just PREC(r), that of course
+   satisfying application requested precision.  But any test counting bits
+   or forming the high product would almost certainly take longer than the
+   incremental cost of an extra limb in mpn_mul_1.
+
+   Enhancements:
+
+   Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the
+   result, leaving low zero limbs after a while, which it might be nice to
+   strip to save work in subsequent operations.  Calculating the low limb
+   explicitly would let us direct mpn_mul_1 to put the balance at rp when
+   the low is zero (instead of normally rp+1).  But it's not clear whether
+   this would be worthwhile.  Explicit code for the low limb will probably
+   be slower than having it done in mpn_mul_1, so we need to consider how
+   often a zero will be stripped and how much that's likely to save
+   later.  */
+
+void
+mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_size_t size;
+  mp_size_t prec, excess;
+  mp_limb_t cy_limb, vl, cbit, cin;
+  mp_ptr rp;
+
+  usize = u->_mp_size;
+  if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpf_t     vf;
+      mp_limb_t vp[2];
+      vp[0] = v & GMP_NUMB_MASK;
+      vp[1] = v >> GMP_NUMB_BITS;
+      PTR(vf) = vp;
+      SIZ(vf) = 2;
+      ASSERT_CODE (PREC(vf) = 2);
+      EXP(vf) = 2;
+      mpf_mul (r, u, vf);
+      return;
+    }
+#endif
+
+  size = ABS (usize);
+  prec = r->_mp_prec;
+  up = u->_mp_d;
+  vl = v;
+  excess = size - prec;
+  cin = 0;
+
+  if (excess > 0)
+    {
+      /* up is bigger than desired rp, shorten it to prec limbs and
+         determine a carry-in */
+
+      mp_limb_t  vl_shifted = vl << GMP_NAIL_BITS;
+      mp_limb_t  hi, lo, next_lo, sum;
+      mp_size_t  i;
+
+      /* high limb of top product */
+      i = excess - 1;
+      umul_ppmm (cin, lo, up[i], vl_shifted);
+
+      /* and carry bit out of products below that, if any */
+      for (;;)
+        {
+          i--;
+          if (i < 0)
+            break;
+
+          umul_ppmm (hi, next_lo, up[i], vl_shifted);
+          lo >>= GMP_NAIL_BITS;
+          ADDC_LIMB (cbit, sum, hi, lo);
+          cin += cbit;
+          lo = next_lo;
+
+          /* Continue only if the sum is GMP_NUMB_MAX.  GMP_NUMB_MAX is the
+             only value a carry from below can propagate across.  If we've
+             just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,
+             so this test stops us for that case too.  */
+          if (LIKELY (sum != GMP_NUMB_MAX))
+            break;
+        }
+
+      up += excess;
+      size = prec;
+    }
+
+  rp = r->_mp_d;
+#if HAVE_NATIVE_mpn_mul_1c
+  cy_limb = mpn_mul_1c (rp, up, size, vl, cin);
+#else
+  cy_limb = mpn_mul_1 (rp, up, size, vl);
+  __GMPN_ADD_1 (cbit, rp, rp, size, cin);
+  cy_limb += cbit;
+#endif
+  rp[size] = cy_limb;
+  cy_limb = cy_limb != 0;
+  r->_mp_exp = u->_mp_exp + cy_limb;
+  size += cy_limb;
+  r->_mp_size = usize >= 0 ? size : -size;
+}

diff --git a/mpf/neg.c b/mpf/neg.c
new file mode 100644
index 0000000..d294815
--- /dev/null
+++ b/mpf/neg.c

@@ -0,0 +1,61 @@
+/* mpf_neg -- Negate a float.
+
+Copyright 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_neg (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t size;
+
+  size = -u->_mp_size;
+  if (r != u)
+    {
+      mp_size_t prec;
+      mp_size_t asize;
+      mp_ptr rp, up;
+
+      prec = r->_mp_prec + 1;	/* lie not to lose precision in assignment */
+      asize = ABS (size);
+      rp = r->_mp_d;
+      up = u->_mp_d;
+
+      if (asize > prec)
+	{
+	  up += asize - prec;
+	  asize = prec;
+	}
+
+      MPN_COPY (rp, up, asize);
+      r->_mp_exp = u->_mp_exp;
+      size = size >= 0 ? asize : -asize;
+    }
+  r->_mp_size = size;
+}

diff --git a/mpf/out_str.c b/mpf/out_str.c
new file mode 100644
index 0000000..1802d0f
--- /dev/null
+++ b/mpf/out_str.c

@@ -0,0 +1,116 @@
+/* mpf_out_str (stream, base, n_digits, op) -- Print N_DIGITS digits from
+   the float OP to STREAM in base BASE.  Return the number of characters
+   written, or 0 if an error occurred.
+
+Copyright 1996, 1997, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+size_t
+mpf_out_str (FILE *stream, int base, size_t n_digits, mpf_srcptr op)
+{
+  char *str;
+  mp_exp_t exp;
+  size_t written;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (base == 0)
+    base = 10;
+  if (n_digits == 0)
+    MPF_SIGNIFICANT_DIGITS (n_digits, base, op->_mp_prec);
+
+  if (stream == 0)
+    stream = stdout;
+
+  /* Consider these changes:
+     * Don't allocate memory here for huge n_digits; pass NULL to mpf_get_str.
+     * Make mpf_get_str allocate extra space when passed NULL, to avoid
+       allocating two huge string buffers.
+     * Implement more/other allocation reductions tricks.  */
+
+  str = (char *) TMP_ALLOC (n_digits + 2); /* extra for minus sign and \0 */
+
+  mpf_get_str (str, &exp, base, n_digits, op);
+  n_digits = strlen (str);
+
+  written = 0;
+
+  /* Write sign */
+  if (str[0] == '-')
+    {
+      str++;
+      fputc ('-', stream);
+      written = 1;
+      n_digits--;
+    }
+
+  {
+    const char  *point = GMP_DECIMAL_POINT;
+    size_t      pointlen = strlen (point);
+    putc ('0', stream);
+    fwrite (point, 1, pointlen, stream);
+    written += pointlen + 1;
+  }
+
+  /* Write mantissa */
+  {
+    size_t fwret;
+    fwret = fwrite (str, 1, n_digits, stream);
+    written += fwret;
+  }
+
+  /* Write exponent */
+  {
+    int fpret;
+    fpret = fprintf (stream, (base <= 10 ? "e%ld" : "@%ld"), exp);
+    written += fpret;
+  }
+
+  TMP_FREE;
+  return ferror (stream) ? 0 : written;
+}

diff --git a/mpf/pow_ui.c b/mpf/pow_ui.c
new file mode 100644
index 0000000..8d54dc0
--- /dev/null
+++ b/mpf/pow_ui.c

@@ -0,0 +1,83 @@
+/* mpf_pow_ui -- Compute b^e.
+
+Copyright 1998, 1999, 2001, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This uses a plain right-to-left square-and-multiply algorithm.
+
+   FIXME: When popcount(e) is not too small, it would probably speed things up
+   to use a k-ary sliding window algorithm.  */
+
+void
+mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
+{
+  mpf_t t;
+  int cnt;
+
+  if (e <= 1)
+    {
+      if (e == 0)
+	mpf_set_ui (r, 1);
+      else
+	mpf_set (r, b);
+      return;
+    }
+
+  count_leading_zeros (cnt, (mp_limb_t) e);
+  cnt = GMP_LIMB_BITS - 1 - cnt;
+
+  /* Increase computation precision as a function of the exponent.  Adding
+     log2(popcount(e) + log2(e)) bits should be sufficient, but we add log2(e),
+     i.e. much more.  With mpf's rounding of precision to whole limbs, this
+     will be excessive only when limbs are artificially small.  */
+  mpf_init2 (t, mpf_get_prec (r) + cnt);
+
+  mpf_set (t, b);		/* consume most significant bit */
+  while (--cnt > 0)
+    {
+      mpf_mul (t, t, t);
+      if ((e >> cnt) & 1)
+	mpf_mul (t, t, b);
+    }
+
+  /* Do the last iteration specially in order to save a copy operation.  */
+  if (e & 1)
+    {
+      mpf_mul (t, t, t);
+      mpf_mul (r, t, b);
+    }
+  else
+    {
+      mpf_mul (r, t, t);
+    }
+
+  mpf_clear (t);
+}

diff --git a/mpf/random2.c b/mpf/random2.c
new file mode 100644
index 0000000..2e0163c
--- /dev/null
+++ b/mpf/random2.c

@@ -0,0 +1,66 @@
+/* mpf_random2 -- Generate a positive random mpf_t of specified size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Intended for testing of other MP routines.
+
+Copyright 1995, 1996, 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpf_random2 (mpf_ptr x, mp_size_t xs, mp_exp_t exp)
+{
+  mp_size_t xn;
+  mp_size_t prec;
+  mp_limb_t elimb;
+
+  xn = ABS (xs);
+  prec = PREC(x);
+
+  if (xn == 0)
+    {
+      EXP(x) = 0;
+      SIZ(x) = 0;
+      return;
+    }
+
+  if (xn > prec + 1)
+    xn = prec + 1;
+
+  /* General random mantissa.  */
+  mpn_random2 (PTR(x), xn);
+
+  /* Generate random exponent.  */
+  _gmp_rand (&elimb, RANDS, GMP_NUMB_BITS);
+  exp = ABS (exp);
+  exp = elimb % (2 * exp + 1) - exp;
+
+  EXP(x) = exp;
+  SIZ(x) = xs < 0 ? -xn : xn;
+}

diff --git a/mpf/reldiff.c b/mpf/reldiff.c
new file mode 100644
index 0000000..3fe6590
--- /dev/null
+++ b/mpf/reldiff.c

@@ -0,0 +1,64 @@
+/* mpf_reldiff -- Generate the relative difference of two floats.
+
+Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* The precision we use for d = x-y is based on what mpf_div will want from
+   the dividend.  It calls mpn_div_q to produce a quotient of rprec+1 limbs.
+   So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize.  */
+
+void
+mpf_reldiff (mpf_ptr rdiff, mpf_srcptr x, mpf_srcptr y)
+{
+  if (UNLIKELY (SIZ(x) == 0))
+    {
+      mpf_set_ui (rdiff, (unsigned long int) (mpf_sgn (y) != 0));
+    }
+  else
+    {
+      mp_size_t dprec;
+      mpf_t d;
+      TMP_DECL;
+
+      TMP_MARK;
+      dprec = PREC(rdiff) + ABSIZ(x);
+      ASSERT (PREC(rdiff)+1 == dprec - ABSIZ(x) + 1);
+
+      PREC(d) = dprec;
+      PTR(d) = TMP_ALLOC_LIMBS (dprec + 1);
+
+      mpf_sub (d, x, y);
+      SIZ(d) = ABSIZ(d);
+      mpf_div (rdiff, d, x);
+
+      TMP_FREE;
+    }
+}

diff --git a/mpf/set.c b/mpf/set.c
new file mode 100644
index 0000000..382fe86
--- /dev/null
+++ b/mpf/set.c

@@ -0,0 +1,55 @@
+/* mpf_set -- Assign a float from another float.
+
+Copyright 1993-1995, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_set (mpf_ptr r, mpf_srcptr u)
+{
+  mp_ptr rp, up;
+  mp_size_t size, asize;
+  mp_size_t prec;
+
+  prec = r->_mp_prec + 1;		/* lie not to lose precision in assignment */
+  size = u->_mp_size;
+  asize = ABS (size);
+  rp = r->_mp_d;
+  up = u->_mp_d;
+
+  if (asize > prec)
+    {
+      up += asize - prec;
+      asize = prec;
+    }
+
+  r->_mp_exp = u->_mp_exp;
+  r->_mp_size = size >= 0 ? asize : -asize;
+  MPN_COPY_INCR (rp, up, asize);
+}

diff --git a/mpf/set_d.c b/mpf/set_d.c
new file mode 100644
index 0000000..0442f2f
--- /dev/null
+++ b/mpf/set_d.c

@@ -0,0 +1,59 @@
+/* mpf_set_d -- Assign a float from a double.
+
+Copyright 1993-1996, 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+
+void
+mpf_set_d (mpf_ptr r, double d)
+{
+  int negative;
+
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         __gmp_invalid_operation ());
+
+  if (UNLIKELY (d == 0))
+    {
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+  negative = d < 0;
+  d = ABS (d);
+
+  SIZ(r) = negative ? -LIMBS_PER_DOUBLE : LIMBS_PER_DOUBLE;
+  EXP(r) = __gmp_extract_double (PTR(r), d);
+}

diff --git a/mpf/set_dfl_prec.c b/mpf/set_dfl_prec.c
new file mode 100644
index 0000000..9be71c0
--- /dev/null
+++ b/mpf/set_dfl_prec.c

@@ -0,0 +1,39 @@
+/* mpf_set_default_prec --
+
+Copyright 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_size_t __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (53);
+
+void
+mpf_set_default_prec (mp_bitcnt_t prec_in_bits) __GMP_NOTHROW
+{
+  __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (prec_in_bits);
+}

diff --git a/mpf/set_prc.c b/mpf/set_prc.c
new file mode 100644
index 0000000..40c3f0e
--- /dev/null
+++ b/mpf/set_prc.c

@@ -0,0 +1,68 @@
+/* mpf_set_prec(x) -- Change the precision of x.
+
+Copyright 1993-1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* A full new_prec+1 limbs are always retained, even though just new_prec
+   would satisfy the requested precision.  If size==new_prec+1 then
+   certainly new_prec+1 should be kept since no copying is needed in that
+   case.  If just new_prec was kept for size>new_prec+1 it'd be a bit
+   inconsistent.  */
+
+void
+mpf_set_prec (mpf_ptr x, mp_bitcnt_t new_prec_in_bits)
+{
+  mp_size_t  old_prec, new_prec, new_prec_plus1;
+  mp_size_t  size, sign;
+  mp_ptr     xp;
+
+  new_prec = __GMPF_BITS_TO_PREC (new_prec_in_bits);
+  old_prec = PREC(x);
+
+  /* do nothing if already the right precision */
+  if (new_prec == old_prec)
+    return;
+
+  PREC(x) = new_prec;
+  new_prec_plus1 = new_prec + 1;
+
+  /* retain most significant limbs */
+  sign = SIZ(x);
+  size = ABS (sign);
+  xp = PTR(x);
+  if (size > new_prec_plus1)
+    {
+      SIZ(x) = (sign >= 0 ? new_prec_plus1 : -new_prec_plus1);
+      MPN_COPY_INCR (xp, xp + size - new_prec_plus1, new_prec_plus1);
+    }
+
+  PTR(x) = __GMP_REALLOCATE_FUNC_LIMBS (xp, old_prec+1, new_prec_plus1);
+}

diff --git a/mpf/set_prc_raw.c b/mpf/set_prc_raw.c
new file mode 100644
index 0000000..e5c52cc
--- /dev/null
+++ b/mpf/set_prc_raw.c

@@ -0,0 +1,39 @@
+/* mpf_set_prec_raw(x,bits) -- Change the precision of x without changing
+   allocation.  For proper operation, the original precision need to be reset
+   sooner or later.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_set_prec_raw (mpf_ptr x, mp_bitcnt_t prec_in_bits) __GMP_NOTHROW
+{
+  x->_mp_prec = __GMPF_BITS_TO_PREC (prec_in_bits);
+}

diff --git a/mpf/set_q.c b/mpf/set_q.c
new file mode 100644
index 0000000..b721e3a
--- /dev/null
+++ b/mpf/set_q.c

@@ -0,0 +1,118 @@
+/* mpf_set_q (mpf_t rop, mpq_t op) -- Convert the rational op to the float rop.
+
+Copyright 1996, 1999, 2001, 2002, 2004, 2005, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* As usual the aim is to produce PREC(r) limbs, with the high non-zero.  The
+   basic mpn_div_q produces a quotient of nsize-dsize+1 limbs, with either the
+   high or second highest limb non-zero.  We arrange for nsize-dsize+1 to equal
+   prec+1, hence giving either prec or prec+1 result limbs at PTR(r).
+
+   nsize-dsize+1 == prec+1 is achieved by adjusting num(q), either dropping low
+   limbs if it's too big, or padding with low zeros if it's too small.  The
+   full given den(q) is always used.
+
+   We cannot truncate den(q), because even when it's much bigger than prec the
+   last limbs can still influence the final quotient.  Often they don't, but we
+   leave optimization of that to mpn_div_q.
+
+   Enhancements:
+
+   The high quotient limb is non-zero when high{np,dsize} > {dp,dsize}.  We
+   could make that comparison and use qsize==prec instead of qsize==prec+1,
+   to save one limb in the division.  */
+
+void
+mpf_set_q (mpf_ptr r, mpq_srcptr q)
+{
+  mp_srcptr np, dp;
+  mp_size_t prec, nsize, dsize, qsize, prospective_qsize, tsize, zeros;
+  mp_size_t sign_quotient, high_zero;
+  mp_ptr qp, tp;
+  mp_exp_t exp;
+  TMP_DECL;
+
+  ASSERT (SIZ(&q->_mp_den) > 0);  /* canonical q */
+
+  nsize = SIZ (&q->_mp_num);
+  dsize = SIZ (&q->_mp_den);
+
+  if (UNLIKELY (nsize == 0))
+    {
+      SIZ (r) = 0;
+      EXP (r) = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  prec = PREC (r);
+  qp = PTR (r);
+
+  sign_quotient = nsize;
+  nsize = ABS (nsize);
+  np = PTR (&q->_mp_num);
+  dp = PTR (&q->_mp_den);
+
+  prospective_qsize = nsize - dsize + 1;  /* q from using given n,d sizes */
+  exp = prospective_qsize;                /* ie. number of integer limbs */
+  qsize = prec + 1;                       /* desired q */
+
+  zeros = qsize - prospective_qsize;      /* n zeros to get desired qsize */
+  tsize = nsize + zeros;                  /* size of intermediate numerator */
+  tp = TMP_ALLOC_LIMBS (tsize + 1);       /* +1 for mpn_div_q's scratch */
+
+  if (zeros > 0)
+    {
+      /* pad n with zeros into temporary space */
+      MPN_ZERO (tp, zeros);
+      MPN_COPY (tp+zeros, np, nsize);
+      np = tp;                            /* mpn_div_q allows this overlap */
+    }
+  else
+    {
+      /* shorten n to get desired qsize */
+      np -= zeros;
+    }
+
+  ASSERT (tsize-dsize+1 == qsize);
+  mpn_div_q (qp, np, tsize, dp, dsize, tp);
+
+  /* strip possible zero high limb */
+  high_zero = (qp[qsize-1] == 0);
+  qsize -= high_zero;
+  exp -= high_zero;
+
+  EXP (r) = exp;
+  SIZ (r) = sign_quotient >= 0 ? qsize : -qsize;
+
+  TMP_FREE;
+}

diff --git a/mpf/set_si.c b/mpf/set_si.c
new file mode 100644
index 0000000..23f713d
--- /dev/null
+++ b/mpf/set_si.c

@@ -0,0 +1,52 @@
+/* mpf_set_si() -- Assign a float from a signed int.
+
+Copyright 1993-1995, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_set_si (mpf_ptr dest, long val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  vl >>= GMP_NUMB_BITS;
+  dest->_mp_d[1] = vl;
+  size += (vl != 0);
+#endif
+
+  dest->_mp_exp = size;
+  dest->_mp_size = val >= 0 ? size : -size;
+}

diff --git a/mpf/set_str.c b/mpf/set_str.c
new file mode 100644
index 0000000..c7bfe0b
--- /dev/null
+++ b/mpf/set_str.c

@@ -0,0 +1,412 @@
+/* mpf_set_str (dest, string, base) -- Convert the string STRING
+   in base BASE to a float in dest.  If BASE is zero, the leading characters
+   of STRING is used to figure out the base.
+
+Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+  This still needs work, as suggested by some FIXME comments.
+  1. Don't depend on superfluous mantissa digits.
+  2. Allocate temp space more cleverly.
+  3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
+*/
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#define digit_value_tab __gmp_digit_value_tab
+
+/* Compute base^exp and return the most significant prec limbs in rp[].
+   Put the count of omitted low limbs in *ign.
+   Return the actual size (which might be less than prec).  */
+static mp_size_t
+mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
+		    mp_limb_t base, mp_exp_t exp,
+		    mp_size_t prec, mp_ptr tp)
+{
+  mp_size_t ign;		/* counts number of ignored low limbs in r */
+  mp_size_t off;		/* keeps track of offset where value starts */
+  mp_ptr passed_rp = rp;
+  mp_size_t rn;
+  int cnt;
+  int i;
+
+  rp[0] = base;
+  rn = 1;
+  off = 0;
+  ign = 0;
+  count_leading_zeros (cnt, exp);
+  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
+    {
+      mpn_sqr (tp, rp + off, rn);
+      rn = 2 * rn;
+      rn -= tp[rn - 1] == 0;
+      ign <<= 1;
+
+      off = 0;
+      if (rn > prec)
+	{
+	  ign += rn - prec;
+	  off = rn - prec;
+	  rn = prec;
+	}
+      MP_PTR_SWAP (rp, tp);
+
+      if (((exp >> i) & 1) != 0)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_mul_1 (rp, rp + off, rn, base);
+	  rp[rn] = cy;
+	  rn += cy != 0;
+	  off = 0;
+	}
+    }
+
+  if (rn > prec)
+    {
+      ign += rn - prec;
+      rp += rn - prec;
+      rn = prec;
+    }
+
+  MPN_COPY_INCR (passed_rp, rp + off, rn);
+  *ignp = ign;
+  return rn;
+}
+
+int
+mpf_set_str (mpf_ptr x, const char *str, int base)
+{
+  size_t str_size;
+  char *s, *begs;
+  size_t i, j;
+  int c;
+  int negative;
+  char *dotpos;
+  const char *expptr;
+  int exp_base;
+  const char  *point = GMP_DECIMAL_POINT;
+  size_t      pointlen = strlen (point);
+  const unsigned char *digit_value;
+  int incr;
+  size_t n_zeros_skipped;
+
+  TMP_DECL;
+
+  c = (unsigned char) *str;
+
+  /* Skip whitespace.  */
+  while (isspace (c))
+    c = (unsigned char) *++str;
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = (unsigned char) *++str;
+    }
+
+  /* Default base to decimal.  */
+  if (base == 0)
+    base = 10;
+
+  exp_base = base;
+
+  if (base < 0)
+    {
+      exp_base = 10;
+      base = -base;
+    }
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 208;
+      if (base > 62)
+	return -1;		/* too large base */
+    }
+
+  /* Require at least one digit, possibly after an initial decimal point.  */
+  if (digit_value[c] >= base)
+    {
+      /* not a digit, must be a decimal point */
+      for (i = 0; i < pointlen; i++)
+	if (str[i] != point[i])
+	  return -1;
+      if (digit_value[(unsigned char) str[pointlen]] >= base)
+	return -1;
+    }
+
+  /* Locate exponent part of the input.  Look from the right of the string,
+     since the exponent is usually a lot shorter than the mantissa.  */
+  expptr = NULL;
+  str_size = strlen (str);
+  for (i = str_size - 1; i > 0; i--)
+    {
+      c = (unsigned char) str[i];
+      if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
+	{
+	  expptr = str + i + 1;
+	  str_size = i;
+	  break;
+	}
+    }
+
+  TMP_MARK;
+  s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+  incr = 0;
+  n_zeros_skipped = 0;
+  dotpos = NULL;
+
+  /* Loop through mantissa, converting it from ASCII to raw byte values.  */
+  for (i = 0; i < str_size; i++)
+    {
+      c = (unsigned char) *str;
+      if (!isspace (c))
+	{
+	  int dig;
+
+	  for (j = 0; j < pointlen; j++)
+	    if (str[j] != point[j])
+	      goto not_point;
+	  if (1)
+	    {
+	      if (dotpos != 0)
+		{
+		  /* already saw a decimal point, another is invalid */
+		  TMP_FREE;
+		  return -1;
+		}
+	      dotpos = s;
+	      str += pointlen - 1;
+	      i += pointlen - 1;
+	    }
+	  else
+	    {
+	    not_point:
+	      dig = digit_value[c];
+	      if (dig >= base)
+		{
+		  TMP_FREE;
+		  return -1;
+		}
+	      *s = dig;
+	      incr |= dig != 0;
+	      s += incr;	/* Increment after first non-0 digit seen. */
+	      if (dotpos != NULL)
+		/* Count skipped zeros between radix point and first non-0
+		   digit. */
+		n_zeros_skipped += 1 - incr;
+	    }
+	}
+      c = (unsigned char) *++str;
+    }
+
+  str_size = s - begs;
+
+  {
+    long exp_in_base;
+    mp_size_t ra, ma, rn, mn;
+    int cnt;
+    mp_ptr mp, tp, rp;
+    mp_exp_t exp_in_limbs;
+    mp_size_t prec = PREC(x) + 1;
+    int divflag;
+    mp_size_t madj, radj;
+
+#if 0
+    size_t n_chars_needed;
+
+    /* This needs careful testing.  Leave disabled for now.  */
+    /* Just consider the relevant leading digits of the mantissa.  */
+    LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
+    if (str_size > n_chars_needed)
+      str_size = n_chars_needed;
+#endif
+
+    if (str_size == 0)
+      {
+	SIZ(x) = 0;
+	EXP(x) = 0;
+	TMP_FREE;
+	return 0;
+      }
+
+    LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
+    mp = TMP_ALLOC_LIMBS (ma);
+    mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
+
+    madj = 0;
+    /* Ignore excess limbs in MP,MSIZE.  */
+    if (mn > prec)
+      {
+	madj = mn - prec;
+	mp += mn - prec;
+	mn = prec;
+      }
+
+    if (expptr != 0)
+      {
+	/* Scan and convert the exponent, in base exp_base.  */
+	long dig, minus, plusminus;
+	c = (unsigned char) *expptr;
+	minus = -(long) (c == '-');
+	plusminus = minus | -(long) (c == '+');
+	expptr -= plusminus;			/* conditional increment */
+	c = (unsigned char) *expptr++;
+	dig = digit_value[c];
+	if (dig >= exp_base)
+	  {
+	    TMP_FREE;
+	    return -1;
+	  }
+	exp_in_base = dig;
+	c = (unsigned char) *expptr++;
+	dig = digit_value[c];
+	while (dig < exp_base)
+	  {
+	    exp_in_base = exp_in_base * exp_base;
+	    exp_in_base += dig;
+	    c = (unsigned char) *expptr++;
+	    dig = digit_value[c];
+	  }
+	exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
+      }
+    else
+      exp_in_base = 0;
+    if (dotpos != 0)
+      exp_in_base -= s - dotpos + n_zeros_skipped;
+    divflag = exp_in_base < 0;
+    exp_in_base = ABS (exp_in_base);
+
+    if (exp_in_base == 0)
+      {
+	MPN_COPY (PTR(x), mp, mn);
+	SIZ(x) = negative ? -mn : mn;
+	EXP(x) = mn + madj;
+	TMP_FREE;
+	return 0;
+      }
+
+    ra = 2 * (prec + 1);
+    TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra);
+    rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
+
+    if (divflag)
+      {
+#if 0
+	/* FIXME: Should use mpn_div_q here.  */
+	...
+	mpn_div_q (tp, mp, mn, rp, rn, scratch);
+	...
+#else
+	mp_ptr qp;
+	mp_limb_t qlimb;
+	if (mn < rn)
+	  {
+	    /* Pad out MP,MSIZE for current divrem semantics.  */
+	    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
+	    MPN_ZERO (tmp, rn - mn);
+	    MPN_COPY (tmp + rn - mn, mp, mn);
+	    mp = tmp;
+	    madj -= rn - mn;
+	    mn = rn;
+	  }
+	if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
+	  {
+	    mp_limb_t cy;
+	    count_leading_zeros (cnt, rp[rn - 1]);
+	    cnt -= GMP_NAIL_BITS;
+	    mpn_lshift (rp, rp, rn, cnt);
+	    cy = mpn_lshift (mp, mp, mn, cnt);
+	    if (cy)
+	      mp[mn++] = cy;
+	  }
+
+	qp = TMP_ALLOC_LIMBS (prec + 1);
+	qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
+	tp = qp;
+	exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
+	rn = prec;
+	if (qlimb != 0)
+	  {
+	    tp[prec] = qlimb;
+	    /* Skip the least significant limb not to overrun the destination
+	       variable.  */
+	    tp++;
+	  }
+#endif
+      }
+    else
+      {
+	tp = TMP_ALLOC_LIMBS (rn + mn);
+	if (rn > mn)
+	  mpn_mul (tp, rp, rn, mp, mn);
+	else
+	  mpn_mul (tp, mp, mn, rp, rn);
+	rn += mn;
+	rn -= tp[rn - 1] == 0;
+	exp_in_limbs = rn + madj + radj;
+
+	if (rn > prec)
+	  {
+	    tp += rn - prec;
+	    rn = prec;
+	    exp_in_limbs += 0;
+	  }
+      }
+
+    MPN_COPY (PTR(x), tp, rn);
+    SIZ(x) = negative ? -rn : rn;
+    EXP(x) = exp_in_limbs;
+    TMP_FREE;
+    return 0;
+  }
+}

diff --git a/mpf/set_ui.c b/mpf/set_ui.c
new file mode 100644
index 0000000..bd4ba26
--- /dev/null
+++ b/mpf/set_ui.c

@@ -0,0 +1,48 @@
+/* mpf_set_ui() -- Assign a float from an unsigned int.
+
+Copyright 1993-1995, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_set_ui (mpf_ptr f, unsigned long val)
+{
+  mp_size_t size;
+
+  f->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = val != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  val >>= GMP_NUMB_BITS;
+  f->_mp_d[1] = val;
+  size += (val != 0);
+#endif
+
+  f->_mp_exp = f->_mp_size = size;
+}

diff --git a/mpf/set_z.c b/mpf/set_z.c
new file mode 100644
index 0000000..f762633
--- /dev/null
+++ b/mpf/set_z.c

@@ -0,0 +1,56 @@
+/* mpf_set_z -- Assign a float from an integer.
+
+Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_set_z (mpf_ptr r, mpz_srcptr u)
+{
+  mp_ptr rp, up;
+  mp_size_t size, asize;
+  mp_size_t prec;
+
+  prec = PREC (r) + 1;
+  size = SIZ (u);
+  asize = ABS (size);
+  rp = PTR (r);
+  up = PTR (u);
+
+  EXP (r) = asize;
+
+  if (asize > prec)
+    {
+      up += asize - prec;
+      asize = prec;
+    }
+
+  SIZ (r) = size >= 0 ? asize : -asize;
+  MPN_COPY (rp, up, asize);
+}

diff --git a/mpf/size.c b/mpf/size.c
new file mode 100644
index 0000000..f7a9dbd
--- /dev/null
+++ b/mpf/size.c

@@ -0,0 +1,38 @@
+/* mpf_size(x) -- return the number of limbs currently used by the
+   value of the float X.
+
+Copyright 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+size_t
+mpf_size (mpf_srcptr f) __GMP_NOTHROW
+{
+  return __GMP_ABS (f->_mp_size);
+}

diff --git a/mpf/sqrt.c b/mpf/sqrt.c
new file mode 100644
index 0000000..ffb7c10
--- /dev/null
+++ b/mpf/sqrt.c

@@ -0,0 +1,112 @@
+/* mpf_sqrt -- Compute the square root of a float.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+/* As usual, the aim is to produce PREC(r) limbs of result, with the high
+   limb non-zero.  This is accomplished by applying mpn_sqrtrem to either
+   2*prec or 2*prec-1 limbs, both such sizes resulting in prec limbs.
+
+   The choice between 2*prec or 2*prec-1 limbs is based on the input
+   exponent.  With b=2^GMP_NUMB_BITS the limb base then we can think of
+   effectively taking out a factor b^(2k), for suitable k, to get to an
+   integer input of the desired size ready for mpn_sqrtrem.  It must be an
+   even power taken out, ie. an even number of limbs, so the square root
+   gives factor b^k and the radix point is still on a limb boundary.  So if
+   EXP(r) is even we'll get an even number of input limbs 2*prec, or if
+   EXP(r) is odd we get an odd number 2*prec-1.
+
+   Further limbs below the 2*prec or 2*prec-1 used don't affect the result
+   and are simply truncated.  This can be seen by considering an integer x,
+   with s=floor(sqrt(x)).  s is the unique integer satisfying s^2 <= x <
+   (s+1)^2.  Notice that adding a fraction part to x (ie. some further bits)
+   doesn't change the inequality, s remains the unique solution.  Working
+   suitable factors of 2 into this argument lets it apply to an intended
+   precision at any position for any x, not just the integer binary point.
+
+   If the input is smaller than 2*prec or 2*prec-1, then we just pad with
+   zeros, that of course being our usual interpretation of short inputs.
+   The effect is to extend the root beyond the size of the input (for
+   instance into fractional limbs if u is an integer).  */
+
+void
+mpf_sqrt (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t usize;
+  mp_ptr up, tp;
+  mp_size_t prec, tsize;
+  mp_exp_t uexp, expodd;
+  TMP_DECL;
+
+  usize = u->_mp_size;
+  if (UNLIKELY (usize <= 0))
+    {
+      if (usize < 0)
+        SQRT_OF_NEGATIVE;
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  uexp = u->_mp_exp;
+  prec = r->_mp_prec;
+  up = u->_mp_d;
+
+  expodd = (uexp & 1);
+  tsize = 2 * prec - expodd;
+  r->_mp_size = prec;
+  r->_mp_exp = (uexp + expodd) / 2;    /* ceil(uexp/2) */
+
+  /* root size is ceil(tsize/2), this will be our desired "prec" limbs */
+  ASSERT ((tsize + 1) / 2 == prec);
+
+  tp = TMP_ALLOC_LIMBS (tsize);
+
+  if (usize > tsize)
+    {
+      up += usize - tsize;
+      usize = tsize;
+      MPN_COPY (tp, up, tsize);
+    }
+  else
+    {
+      MPN_ZERO (tp, tsize - usize);
+      MPN_COPY (tp + (tsize - usize), up, usize);
+    }
+
+  mpn_sqrtrem (r->_mp_d, NULL, tp, tsize);
+
+  TMP_FREE;
+}

diff --git a/mpf/sqrt_ui.c b/mpf/sqrt_ui.c
new file mode 100644
index 0000000..9f91f99
--- /dev/null
+++ b/mpf/sqrt_ui.c

@@ -0,0 +1,108 @@
+/* mpf_sqrt_ui -- Compute the square root of an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+/* As usual the aim is to produce PREC(r) limbs of result with the high limb
+   non-zero.  That high limb will end up floor(sqrt(u)), and limbs below are
+   produced by padding the input with zeros, two for each desired result
+   limb, being 2*(prec-1) for a total 2*prec-1 limbs passed to mpn_sqrtrem.
+   The way mpn_sqrtrem calculates floor(sqrt(x)) ensures the root is correct
+   to the intended accuracy, ie. truncated to prec limbs.
+
+   With nails, u might be two limbs, in which case a total 2*prec limbs is
+   passed to mpn_sqrtrem (still giving a prec limb result).  If uhigh is
+   zero we adjust back to 2*prec-1, since mpn_sqrtrem requires the high
+   non-zero.  2*prec limbs are always allocated, even when uhigh is zero, so
+   the store of uhigh can be done without a conditional.
+
+   u==0 is a special case so the rest of the code can assume the result is
+   non-zero (ie. will have a non-zero high limb on the result).
+
+   Not done:
+
+   No attempt is made to identify perfect squares.  It's considered this can
+   be left to an application if it might occur with any frequency.  As it
+   stands, mpn_sqrtrem does its normal amount of work on a perfect square
+   followed by zero limbs, though of course only an mpn_sqrtrem1 would be
+   actually needed.  We also end up leaving our mpf result with lots of low
+   trailing zeros, slowing down subsequent operations.
+
+   We're not aware of any optimizations that can be made using the fact the
+   input has lots of trailing zeros (apart from the perfect square
+   case).  */
+
+
+/* 1 if we (might) need two limbs for u */
+#define U2   (GMP_NUMB_BITS < BITS_PER_ULONG)
+
+void
+mpf_sqrt_ui (mpf_ptr r, unsigned long int u)
+{
+  mp_size_t rsize, zeros;
+  mp_ptr tp;
+  mp_size_t prec;
+  TMP_DECL;
+
+  if (UNLIKELY (u <= 1))
+    {
+      SIZ (r) = EXP (r) = u;
+      *PTR (r) = u;
+      return;
+    }
+
+  TMP_MARK;
+
+  prec = PREC (r);
+  zeros = 2 * prec - 2;
+  rsize = zeros + 1 + U2;
+
+  tp = TMP_ALLOC_LIMBS (rsize);
+
+  MPN_ZERO (tp, zeros);
+  tp[zeros] = u & GMP_NUMB_MASK;
+
+#if U2
+  {
+    mp_limb_t uhigh = u >> GMP_NUMB_BITS;
+    tp[zeros + 1] = uhigh;
+    rsize -= (uhigh == 0);
+  }
+#endif
+
+  mpn_sqrtrem (PTR (r), NULL, tp, rsize);
+
+  SIZ (r) = prec;
+  EXP (r) = 1;
+  TMP_FREE;
+}

diff --git a/mpf/sub.c b/mpf/sub.c
new file mode 100644
index 0000000..56f26f6
--- /dev/null
+++ b/mpf/sub.c

@@ -0,0 +1,395 @@
+/* mpf_sub -- Subtract two floats.
+
+Copyright 1993-1996, 1999-2002, 2004, 2005, 2011, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_sub (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t exp;
+  mp_size_t ediff;
+  int negate;
+  TMP_DECL;
+
+  usize = SIZ (u);
+  vsize = SIZ (v);
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (usize == 0)
+    {
+      mpf_neg (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      if (r != u)
+        mpf_set (r, u);
+      return;
+    }
+
+  /* If signs of U and V are different, perform addition.  */
+  if ((usize ^ vsize) < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = EXP (v);
+      v_negated._mp_d = PTR (v);
+      mpf_add (r, u, &v_negated);
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Signs are now known to be the same.  */
+  negate = usize < 0;
+
+  /* Make U be the operand with the largest exponent.  */
+  if (EXP (u) < EXP (v))
+    {
+      mpf_srcptr t;
+      t = u; u = v; v = t;
+      negate ^= 1;
+      usize = SIZ (u);
+      vsize = SIZ (v);
+    }
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  up = PTR (u);
+  vp = PTR (v);
+  rp = PTR (r);
+  prec = PREC (r) + 1;
+  exp = EXP (u);
+  ediff = exp - EXP (v);
+
+  /* If ediff is 0 or 1, we might have a situation where the operands are
+     extremely close.  We need to scan the operands from the most significant
+     end ignore the initial parts that are equal.  */
+  if (ediff <= 1)
+    {
+      if (ediff == 0)
+	{
+	  /* Skip leading limbs in U and V that are equal.  */
+	      /* This loop normally exits immediately.  Optimize for that.  */
+	      while (up[usize - 1] == vp[vsize - 1])
+		{
+		  usize--;
+		  vsize--;
+		  exp--;
+
+		  if (usize == 0)
+		    {
+                      /* u cancels high limbs of v, result is rest of v */
+		      negate ^= 1;
+                    cancellation:
+                      /* strip high zeros before truncating to prec */
+                      while (vsize != 0 && vp[vsize - 1] == 0)
+                        {
+                          vsize--;
+                          exp--;
+                        }
+		      if (vsize > prec)
+			{
+			  vp += vsize - prec;
+			  vsize = prec;
+			}
+                      MPN_COPY_INCR (rp, vp, vsize);
+                      rsize = vsize;
+                      goto done;
+		    }
+		  if (vsize == 0)
+		    {
+                      vp = up;
+                      vsize = usize;
+                      goto cancellation;
+		    }
+		}
+
+	  if (up[usize - 1] < vp[vsize - 1])
+	    {
+	      /* For simplicity, swap U and V.  Note that since the loop above
+		 wouldn't have exited unless up[usize - 1] and vp[vsize - 1]
+		 were non-equal, this if-statement catches all cases where U
+		 is smaller than V.  */
+	      MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+	      negate ^= 1;
+	      /* negating ediff not necessary since it is 0.  */
+	    }
+
+	  /* Check for
+	     x+1 00000000 ...
+	      x  ffffffff ... */
+	  if (up[usize - 1] != vp[vsize - 1] + 1)
+	    goto general_case;
+	  usize--;
+	  vsize--;
+	  exp--;
+	}
+      else /* ediff == 1 */
+	{
+	  /* Check for
+	     1 00000000 ...
+	     0 ffffffff ... */
+
+	  if (up[usize - 1] != 1 || vp[vsize - 1] != GMP_NUMB_MAX
+	      || (usize >= 2 && up[usize - 2] != 0))
+	    goto general_case;
+
+	  usize--;
+	  exp--;
+	}
+
+      /* Skip sequences of 00000000/ffffffff */
+      while (vsize != 0 && usize != 0 && up[usize - 1] == 0
+	     && vp[vsize - 1] == GMP_NUMB_MAX)
+	{
+	  usize--;
+	  vsize--;
+	  exp--;
+	}
+
+      if (usize == 0)
+	{
+	  while (vsize != 0 && vp[vsize - 1] == GMP_NUMB_MAX)
+	    {
+	      vsize--;
+	      exp--;
+	    }
+	}
+      else if (usize > prec - 1)
+	{
+	  up += usize - (prec - 1);
+	  usize = prec - 1;
+	}
+      if (vsize > prec - 1)
+	{
+	  vp += vsize - (prec - 1);
+	  vsize = prec - 1;
+	}
+
+      tp = TMP_ALLOC_LIMBS (prec);
+      {
+	mp_limb_t cy_limb;
+	if (vsize == 0)
+	  {
+	    MPN_COPY (tp, up, usize);
+	    tp[usize] = 1;
+	    rsize = usize + 1;
+	    exp++;
+	    goto normalized;
+	  }
+	if (usize == 0)
+	  {
+	    cy_limb = mpn_neg (tp, vp, vsize);
+	    rsize = vsize;
+	  }
+	else if (usize >= vsize)
+	  {
+	    /* uuuu     */
+	    /* vv       */
+	    mp_size_t size;
+	    size = usize - vsize;
+	    MPN_COPY (tp, up, size);
+	    cy_limb = mpn_sub_n (tp + size, up + size, vp, vsize);
+	    rsize = usize;
+	  }
+	else /* (usize < vsize) */
+	  {
+	    /* uuuu     */
+	    /* vvvvvvv  */
+	    mp_size_t size;
+	    size = vsize - usize;
+	    cy_limb = mpn_neg (tp, vp, size);
+	    cy_limb = mpn_sub_nc (tp + size, up, vp + size, usize, cy_limb);
+	    rsize = vsize;
+	  }
+	if (cy_limb == 0)
+	  {
+	    tp[rsize] = 1;
+	    rsize++;
+	    exp++;
+	    goto normalized;
+	  }
+	goto normalize;
+      }
+    }
+
+general_case:
+  /* If U extends beyond PREC, ignore the part that does.  */
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this may make vsize negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (rp != up)
+	MPN_COPY (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+      /* Allocate temp space for the result.  Allocate
+	 just vsize + ediff later???  */
+      tp = TMP_ALLOC_LIMBS (prec);
+
+      /* Locate the least significant non-zero limb in (the needed
+	 parts of) U and V, to simplify the code below.  */
+      for (;;)
+	{
+	  if (vsize == 0)
+	    {
+	      MPN_COPY (rp, up, usize);
+	      rsize = usize;
+	      goto done;
+	    }
+	  if (vp[0] != 0)
+	    break;
+	  vp++, vsize--;
+	}
+      for (;;)
+	{
+	  if (usize == 0)
+	    {
+	      MPN_COPY (rp, vp, vsize);
+	      rsize = vsize;
+	      negate ^= 1;
+	      goto done;
+	    }
+	  if (up[0] != 0)
+	    break;
+	  up++, usize--;
+	}
+
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+	{
+	  /* U and V partially overlaps.  */
+	  if (ediff == 0)
+	    {
+	      /* Have to compare the leading limbs of u and v
+		 to determine whether to compute u - v or v - u.  */
+	      if (usize >= vsize)
+		{
+		  /* uuuu     */
+		  /* vv       */
+		  mp_size_t size;
+		  size = usize - vsize;
+		  MPN_COPY (tp, up, size);
+		  mpn_sub_n (tp + size, up + size, vp, vsize);
+		  rsize = usize;
+		}
+	      else /* (usize < vsize) */
+		{
+		  /* uuuu     */
+		  /* vvvvvvv  */
+		  mp_size_t size;
+		  size = vsize - usize;
+		  ASSERT_CARRY (mpn_neg (tp, vp, size));
+		  mpn_sub_nc (tp + size, up, vp + size, usize, CNST_LIMB (1));
+		  rsize = vsize;
+		}
+	    }
+	  else
+	    {
+	      if (vsize + ediff <= usize)
+		{
+		  /* uuuu     */
+		  /*   v      */
+		  mp_size_t size;
+		  size = usize - ediff - vsize;
+		  MPN_COPY (tp, up, size);
+		  mpn_sub (tp + size, up + size, usize - size, vp, vsize);
+		  rsize = usize;
+		}
+	      else
+		{
+		  /* uuuu     */
+		  /*   vvvvv  */
+		  mp_size_t size;
+		  rsize = vsize + ediff;
+		  size = rsize - usize;
+		  ASSERT_CARRY (mpn_neg (tp, vp, size));
+		  mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
+		  /* Should we use sub_nc then sub_1? */
+		  MPN_DECR_U (tp + size, usize, CNST_LIMB (1));
+		}
+	    }
+	}
+      else
+	{
+	  /* uuuu     */
+	  /*      vv  */
+	  mp_size_t size, i;
+	  size = vsize + ediff - usize;
+	  ASSERT_CARRY (mpn_neg (tp, vp, vsize));
+	  for (i = vsize; i < size; i++)
+	    tp[i] = GMP_NUMB_MAX;
+	  mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
+	  rsize = size + usize;
+	}
+
+    normalize:
+      /* Full normalize.  Optimize later.  */
+      while (rsize != 0 && tp[rsize - 1] == 0)
+	{
+	  rsize--;
+	  exp--;
+	}
+    normalized:
+      MPN_COPY (rp, tp, rsize);
+    }
+
+ done:
+  TMP_FREE;
+  if (rsize == 0) {
+    SIZ (r) = 0;
+    EXP (r) = 0;
+  } else {
+    SIZ (r) = negate ? -rsize : rsize;
+    EXP (r) = exp;
+  }
+}

diff --git a/mpf/sub_ui.c b/mpf/sub_ui.c
new file mode 100644
index 0000000..a23d2a8
--- /dev/null
+++ b/mpf/sub_ui.c

@@ -0,0 +1,50 @@
+/* mpf_sub_ui -- Subtract an unsigned integer from a float.
+
+Copyright 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_sub_ui (mpf_ptr sum, mpf_srcptr u, unsigned long int v)
+{
+  __mpf_struct vv;
+  mp_limb_t vl;
+
+  if (v == 0)
+    {
+      mpf_set (sum, u);
+      return;
+    }
+
+  vl = v;
+  vv._mp_size = 1;
+  vv._mp_d = &vl;
+  vv._mp_exp = 1;
+  mpf_sub (sum, u, &vv);
+}

diff --git a/mpf/swap.c b/mpf/swap.c
new file mode 100644
index 0000000..80b2e9b
--- /dev/null
+++ b/mpf/swap.c

@@ -0,0 +1,56 @@
+/* mpf_swap (U, V) -- Swap U and V.
+
+Copyright 1997, 1998, 2000, 2001, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_swap (mpf_ptr u, mpf_ptr v) __GMP_NOTHROW
+{
+  mp_ptr tptr;
+  mp_size_t tprec;
+  mp_size_t tsiz;
+  mp_exp_t  texp;
+
+  tprec = PREC(u);
+  PREC(u) = PREC(v);
+  PREC(v) = tprec;
+
+  tsiz = SIZ(u);
+  SIZ(u) = SIZ(v);
+  SIZ(v) = tsiz;
+
+  texp = EXP(u);
+  EXP(u) = EXP(v);
+  EXP(v) = texp;
+
+  tptr = PTR(u);
+  PTR(u) = PTR(v);
+  PTR(v) = tptr;
+}

diff --git a/mpf/trunc.c b/mpf/trunc.c
new file mode 100644
index 0000000..e9af4a7
--- /dev/null
+++ b/mpf/trunc.c

@@ -0,0 +1,74 @@
+/* mpf_trunc -- truncate an mpf to an integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Notice the use of prec+1 ensures mpf_trunc is equivalent to mpf_set if u
+   is already an integer.  */
+
+void
+mpf_trunc (mpf_ptr r, mpf_srcptr u)
+{
+  mp_ptr     rp;
+  mp_srcptr  up;
+  mp_size_t  size, asize, prec;
+  mp_exp_t   exp;
+
+  exp = EXP(u);
+  size = SIZ(u);
+  if (size == 0 || exp <= 0)
+    {
+      /* u is only a fraction */
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  up = PTR(u);
+  EXP(r) = exp;
+  asize = ABS (size);
+  up += asize;
+
+  /* skip fraction part of u */
+  asize = MIN (asize, exp);
+
+  /* don't lose precision in the copy */
+  prec = PREC(r) + 1;
+
+  /* skip excess over target precision */
+  asize = MIN (asize, prec);
+
+  up -= asize;
+  rp = PTR(r);
+  SIZ(r) = (size >= 0 ? asize : -asize);
+  if (rp != up)
+    MPN_COPY_INCR (rp, up, asize);
+}

diff --git a/mpf/ui_div.c b/mpf/ui_div.c
new file mode 100644
index 0000000..d228bd4
--- /dev/null
+++ b/mpf/ui_div.c

@@ -0,0 +1,127 @@
+/* mpf_ui_div -- Divide an unsigned integer with a float.
+
+Copyright 1993-1996, 2000-2002, 2004, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpf_ui_div (mpf_ptr r, unsigned long int u, mpf_srcptr v)
+{
+  mp_srcptr vp;
+  mp_ptr rp, tp, remp, new_vp;
+  mp_size_t vsize;
+  mp_size_t rsize, prospective_rsize, zeros, tsize, high_zero;
+  mp_size_t sign_quotient;
+  mp_size_t prec;
+  mp_exp_t rexp;
+  TMP_DECL;
+
+  vsize = v->_mp_size;
+  sign_quotient = vsize;
+
+  if (UNLIKELY (vsize == 0))
+    DIVIDE_BY_ZERO;
+
+  if (UNLIKELY (u == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  vsize = ABS (vsize);
+  prec = r->_mp_prec;
+
+  TMP_MARK;
+  rexp = 1 - v->_mp_exp + 1;
+
+  rp = r->_mp_d;
+  vp = v->_mp_d;
+
+  prospective_rsize = 1 - vsize + 1;    /* quot from using given u,v sizes */
+  rsize = prec + 1;                     /* desired quot size */
+
+  zeros = rsize - prospective_rsize;    /* padding u to give rsize */
+  tsize = 1 + zeros;                    /* u with zeros */
+
+  if (WANT_TMP_DEBUG)
+    {
+      /* separate alloc blocks, for malloc debugging */
+      remp = TMP_ALLOC_LIMBS (vsize);
+      tp = TMP_ALLOC_LIMBS (tsize);
+      new_vp = NULL;
+      if (rp == vp)
+        new_vp = TMP_ALLOC_LIMBS (vsize);
+    }
+  else
+    {
+      /* one alloc with calculated size, for efficiency */
+      mp_size_t size = vsize + tsize + (rp == vp ? vsize : 0);
+      remp = TMP_ALLOC_LIMBS (size);
+      tp = remp + vsize;
+      new_vp = tp + tsize;
+    }
+
+  /* ensure divisor doesn't overlap quotient */
+  if (rp == vp)
+    {
+      MPN_COPY (new_vp, vp, vsize);
+      vp = new_vp;
+    }
+
+  MPN_ZERO (tp, tsize-1);
+
+  tp[tsize - 1] = u & GMP_NUMB_MASK;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (u > GMP_NUMB_MAX)
+    {
+      /* tsize-vsize+1 == rsize, so tsize >= rsize.  rsize == prec+1 >= 2,
+         so tsize >= 2, hence there's room for 2-limb u with nails */
+      ASSERT (tsize >= 2);
+      tp[tsize - 1] = u >> GMP_NUMB_BITS;
+      tp[tsize - 2] = u & GMP_NUMB_MASK;
+      rexp++;
+    }
+#endif
+
+  ASSERT (tsize-vsize+1 == rsize);
+  mpn_tdiv_qr (rp, remp, (mp_size_t) 0, tp, tsize, vp, vsize);
+
+  /* strip possible zero high limb */
+  high_zero = (rp[rsize-1] == 0);
+  rsize -= high_zero;
+  rexp -= high_zero;
+
+  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
+  r->_mp_exp = rexp;
+  TMP_FREE;
+}

diff --git a/mpf/ui_sub.c b/mpf/ui_sub.c
new file mode 100644
index 0000000..58da56b
--- /dev/null
+++ b/mpf/ui_sub.c

@@ -0,0 +1,281 @@
+/* mpf_ui_sub -- Subtract a float from an unsigned long int.
+
+Copyright 1993-1996, 2001, 2002, 2005, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
+{
+#if 1
+  __mpf_struct uu;
+  mp_limb_t ul;
+
+  if (u == 0)
+    {
+      mpf_neg (r, v);
+      return;
+    }
+
+  ul = u;
+  uu._mp_size = 1;
+  uu._mp_d = &ul;
+  uu._mp_exp = 1;
+  mpf_sub (r, &uu, v);
+
+#else
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t uexp;
+  mp_size_t ediff;
+  int negate;
+  mp_limb_t ulimb;
+  TMP_DECL;
+
+  vsize = v->_mp_size;
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (u == 0)
+    {
+      mpf_neg (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      mpf_set_ui (r, u);
+      return;
+    }
+
+  /* If signs of U and V are different, perform addition.  */
+  if (vsize < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = v->_mp_exp;
+      v_negated._mp_d = v->_mp_d;
+      mpf_add_ui (r, &v_negated, u);
+      return;
+    }
+
+  /* Signs are now known to be the same.  */
+  ASSERT (vsize > 0);
+  ulimb = u;
+  /* Make U be the operand with the largest exponent.  */
+  negate = 1 < v->_mp_exp;
+  prec = r->_mp_prec + negate;
+  rp = r->_mp_d;
+  if (negate)
+    {
+      usize = vsize;
+      vsize = 1;
+      up = v->_mp_d;
+      vp = &ulimb;
+      uexp = v->_mp_exp;
+      ediff = uexp - 1;
+
+      /* If U extends beyond PREC, ignore the part that does.  */
+      if (usize > prec)
+	{
+	  up += usize - prec;
+	  usize = prec;
+	}
+      ASSERT (ediff > 0);
+    }
+  else
+    {
+      vp = v->_mp_d;
+      ediff = 1 - v->_mp_exp;
+  /* Ignore leading limbs in U and V that are equal.  Doing
+     this helps increase the precision of the result.  */
+      if (ediff == 0 && ulimb == vp[vsize - 1])
+	{
+	  usize = 0;
+	  vsize--;
+	  uexp = 0;
+	  /* Note that V might now have leading zero limbs.
+	     In that case we have to adjust uexp.  */
+	  for (;;)
+	    {
+	      if (vsize == 0) {
+		rsize = 0;
+		uexp = 0;
+		goto done;
+	      }
+	      if ( vp[vsize - 1] != 0)
+		break;
+	      vsize--, uexp--;
+	    }
+	}
+      else
+	{
+	  usize = 1;
+	  uexp = 1;
+	  up = &ulimb;
+	}
+      ASSERT (usize <= prec);
+    }
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (rp != up)
+	MPN_COPY (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this can make vsize neither zero nor negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+      /* Locate the least significant non-zero limb in (the needed
+	 parts of) U and V, to simplify the code below.  */
+      ASSERT (vsize > 0);
+      for (;;)
+	{
+	  if (vp[0] != 0)
+	    break;
+	  vp++, vsize--;
+	  if (vsize == 0)
+	    {
+	      MPN_COPY (rp, up, usize);
+	      rsize = usize;
+	      goto done;
+	    }
+	}
+      for (;;)
+	{
+	  if (usize == 0)
+	    {
+	      MPN_COPY (rp, vp, vsize);
+	      rsize = vsize;
+	      negate ^= 1;
+	      goto done;
+	    }
+	  if (up[0] != 0)
+	    break;
+	  up++, usize--;
+	}
+
+      ASSERT (usize > 0 && vsize > 0);
+      TMP_MARK;
+
+      tp = TMP_ALLOC_LIMBS (prec);
+
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+	{
+	  /* U and V partially overlaps.  */
+	  if (ediff == 0)
+	    {
+	      ASSERT (usize == 1 && vsize >= 1 && ulimb == *up); /* usize is 1>ediff, vsize >= 1 */
+	      if (1 < vsize)
+		{
+		  /* u        */
+		  /* vvvvvvv  */
+		  rsize = vsize;
+		  vsize -= 1;
+		  /* mpn_cmp (up, vp + vsize - usize, usize) > 0 */
+		  if (ulimb > vp[vsize])
+		    {
+		      tp[vsize] = ulimb - vp[vsize] - 1;
+		      ASSERT_CARRY (mpn_neg (tp, vp, vsize));
+		    }
+		  else
+		    {
+		      /* vvvvvvv  */  /* Swap U and V. */
+		      /* u        */
+		      MPN_COPY (tp, vp, vsize);
+		      tp[vsize] = vp[vsize] - ulimb;
+		      negate = 1;
+		    }
+		}
+	      else /* vsize == usize == 1 */
+		{
+		  /* u     */
+		  /* v     */
+		  rsize = 1;
+		  negate = ulimb < vp[0];
+		  tp[0] = negate ? vp[0] - ulimb: ulimb - vp[0];
+		}
+	    }
+	  else
+	    {
+	      ASSERT (vsize + ediff <= usize);
+	      ASSERT (vsize == 1 && usize >= 2 && ulimb == *vp);
+		{
+		  /* uuuu     */
+		  /*   v      */
+		  mp_size_t size;
+		  size = usize - ediff - 1;
+		  MPN_COPY (tp, up, size);
+		  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up + size, usize - size, ulimb));
+		  rsize = usize;
+		}
+		/* Other cases are not possible */
+		/* uuuu     */
+		/*   vvvvv  */
+	    }
+	}
+      else
+	{
+	  /* uuuu     */
+	  /*      vv  */
+	  mp_size_t size, i;
+	  ASSERT_CARRY (mpn_neg (tp, vp, vsize));
+	  rsize = vsize + ediff;
+	  size = rsize - usize;
+	  for (i = vsize; i < size; i++)
+	    tp[i] = GMP_NUMB_MAX;
+	  ASSERT_NOCARRY (mpn_sub_1 (tp + size, up, usize, CNST_LIMB (1)));
+	}
+
+      /* Full normalize.  Optimize later.  */
+      while (rsize != 0 && tp[rsize - 1] == 0)
+	{
+	  rsize--;
+	  uexp--;
+	}
+      MPN_COPY (rp, tp, rsize);
+      TMP_FREE;
+    }
+
+ done:
+  r->_mp_size = negate ? -rsize : rsize;
+  r->_mp_exp = uexp;
+#endif
+}

diff --git a/mpf/urandomb.c b/mpf/urandomb.c
new file mode 100644
index 0000000..7939901
--- /dev/null
+++ b/mpf/urandomb.c

@@ -0,0 +1,68 @@
+/* mpf_urandomb (rop, state, nbits) -- Generate a uniform pseudorandom
+   real number between 0 (inclusive) and 1 (exclusive) of size NBITS,
+   using STATE as the random state previously initialized by a call to
+   gmp_randinit().
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpf_urandomb (mpf_ptr rop, gmp_randstate_ptr rstate, mp_bitcnt_t nbits)
+{
+  mp_ptr rp;
+  mp_size_t nlimbs;
+  mp_exp_t exp;
+  mp_size_t prec;
+
+  rp = PTR (rop);
+  nlimbs = BITS_TO_LIMBS (nbits);
+  prec = PREC (rop);
+
+  if (nlimbs > prec + 1 || nlimbs == 0)
+    {
+      nlimbs = prec + 1;
+      nbits = nlimbs * GMP_NUMB_BITS;
+    }
+
+  _gmp_rand (rp, rstate, nbits);
+
+  /* If nbits isn't a multiple of GMP_NUMB_BITS, shift up.  */
+  if (nbits % GMP_NUMB_BITS != 0)
+    mpn_lshift (rp, rp, nlimbs, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS);
+
+  exp = 0;
+  while (nlimbs != 0 && rp[nlimbs - 1] == 0)
+    {
+      nlimbs--;
+      exp--;
+    }
+  EXP (rop) = exp;
+  SIZ (rop) = nlimbs;
+}

diff --git a/mpn/arm/aors_n.asm b/mpn/arm/aors_n.asm
new file mode 100644
index 0000000..b055ed5
--- /dev/null
+++ b/mpn/arm/aors_n.asm

@@ -0,0 +1,112 @@
+dnl  ARM mpn_add_n and mpn_sub_n
+
+dnl  Contributed to the GNU project by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.5	slightly fluctuating
+C Cortex-A15	 2.25
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`CLRCY',	`cmn	r0, #0')
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, n, #0')
+  define(`func',	mpn_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`CLRCY',	`cmp	r0, r0')
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			and	r0, r0, #1')
+  define(`func',	mpn_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	ldr	r12, [sp, #0]
+	stmfd	sp!, { r8, r9, lr }
+	SETCY(	r12)
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(func)
+	stmfd	sp!, { r8, r9, lr }
+	CLRCY(	r12)
+L(ent):	tst	n, #1
+	beq	L(skip1)
+	ldr	r12, [up], #4
+	ldr	lr, [vp], #4
+	ADDSUBC	r12, r12, lr
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r8, r9 }
+	ldmia	vp!, { r12, lr }
+	ADDSUBC	r8, r8, r12
+	ADDSUBC	r9, r9, lr
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+L(top):	ldmia	up!, { r4, r5, r6, r7 }
+	ldmia	vp!, { r8, r9, r12, lr }
+	ADDSUBC	r4, r4, r8
+	sub	n, n, #4
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r12
+	ADDSUBC	r7, r7, lr
+	stmia	rp!, { r4, r5, r6, r7 }
+	teq	n, #0
+	bne	L(top)
+
+	ldmfd	sp!, { r4, r5, r6, r7 }
+
+L(rtn):	RETVAL
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE()

diff --git a/mpn/arm/aorslsh1_n.asm b/mpn/arm/aorslsh1_n.asm
new file mode 100644
index 0000000..8d3a733
--- /dev/null
+++ b/mpn/arm/aorslsh1_n.asm

@@ -0,0 +1,167 @@
+dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	      addlsh1_n       sublsh1_n
+C	     cycles/limb     cycles/limb
+C StrongARM	 ?		 ?
+C XScale	 ?		 ?
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3.12		 3.7
+C Cortex-A15	 ?		 ?
+
+C TODO
+C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
+C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
+C    very much.  If two insns are really needed, it might help to separate them
+C    for better micro-parallelism.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_addlsh1_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, $1, #2')
+  define(`SAVECY',	`sbc	$1, $2, #0')
+  define(`RESTCY',	`cmn	$1, #1')
+  define(`REVCY',	`')
+  define(`INICYR',	`mov	$1, #0')
+  define(`r10r11',	`r11')
+  define(`func',	mpn_addlsh1_n)
+  define(`func_nc',	mpn_addlsh1_nc)')
+ifdef(`OPERATION_sublsh1_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`adc	r0, $1, #1')
+  define(`SAVECY',	`sbc	$1, $1, $1')
+  define(`RESTCY',	`cmn	$1, #1')
+  define(`REVCY',	`sbc	$1, $1, $1
+			cmn	$1, #1')
+  define(`INICYR',	`mvn	$1, #0')
+  define(`r10r11',	`r10')
+  define(`func',	mpn_sublsh1_n)
+  define(`func_nc',	mpn_sublsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r10r11, r14}
+
+ifdef(`OPERATION_addlsh1_n', `
+	mvn	r11, #0
+')
+	INICYR(	r14)
+	subs	n, n, #3
+	blt	L(le2)			C carry clear on branch path
+
+	cmn	r0, #0			C clear carry
+	ldmia	vp!, {r8, r9, r10}
+	b	L(mid)
+
+L(top):	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	ldmia	vp!, {r8, r9, r10}
+	stmia	rp!, {r4, r5, r6}
+	REVCY(r14)
+	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	adcs	r10, r10, r10
+	ldmia	up!, {r4, r5, r6}
+	SAVECY(	r14, r11)
+	subs	n, n, #3
+	blt	L(exi)
+	RESTCY(	r12)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	ldmia	vp!, {r8, r9, r10}
+	stmia	rp!, {r4, r5, r6}
+	REVCY(r12)
+L(mid):	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	adcs	r10, r10, r10
+	ldmia	up!, {r4, r5, r6}
+	SAVECY(	r12, r11)
+	subs	n, n, #3
+	bge	L(top)
+
+	mov	r7, r12			C swap alternating...
+	mov	r12, r14		C ...carry-save...
+	mov	r14, r7			C ...registers
+
+L(exi):	RESTCY(	r12)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	ADDSUBC	r6, r6, r10
+	stmia	rp!, {r4, r5, r6}
+
+	REVCY(r12)
+L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
+	beq	L(e1)
+
+L(e02):	tst	n, #2
+	beq	L(rt0)
+	ldm	vp, {r8, r9}
+	adcs	r8, r8, r8
+	adcs	r9, r9, r9
+	ldm	up, {r4, r5}
+	SAVECY(	r12, r11)
+	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	ADDSUBC	r5, r5, r9
+	stm	rp, {r4, r5}
+	b	L(rt1)
+
+L(e1):	ldr	r8, [vp]
+	adcs	r8, r8, r8
+	ldr	r4, [up]
+	SAVECY(	r12, r11)
+	RESTCY(	r14)
+	ADDSUBC	r4, r4, r8
+	str	r4, [rp]
+
+L(rt1):	mov	r14, r12
+	REVCY(r12)
+L(rt0):	RETVAL(	r14)
+	pop	{r4-r10r11, r14}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/aorsmul_1.asm b/mpn/arm/aorsmul_1.asm
new file mode 100644
index 0000000..15d4d42
--- /dev/null
+++ b/mpn/arm/aorsmul_1.asm

@@ -0,0 +1,135 @@
+dnl  ARM mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:     ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.25
+C Cortex-A15	 4
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`vl', `r3')
+define(`rl', `r12')
+define(`ul', `r6')
+define(`r',  `lr')
+
+ifdef(`OPERATION_addmul_1', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`CLRRCY',	`mov	$1, #0
+			adds	r0, r0, #0')
+  define(`RETVAL',	`adc	r0, r4, #0')
+  define(`func',	mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`CLRRCY',	`subs	$1, r0, r0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			sub	r0, $1, r0')
+  define(`func',	mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+	stmfd	sp!, { r4-r6, lr }
+	CLRRCY(	r4)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	umull	r5, r4, ul, vl
+	ADDSUB	r, rl, r5
+	str	r, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ADDSUBC	r, rl, r5
+	str	r, [rp], #4
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	b	L(in)
+
+L(top):	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+L(in):	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	ADDSUBC	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	sub	n, n, #4
+	tst	n, n
+	str	r, [rp], #4
+	bne	L(top)
+
+	ADDSUBC	r, rl, r5
+	str	r, [rp]
+
+L(rtn):	RETVAL(	r4)
+	ldmfd	sp!, { r4-r6, pc }
+EPILOGUE()

diff --git a/mpn/arm/arm-defs.m4 b/mpn/arm/arm-defs.m4
new file mode 100644
index 0000000..4b4fa0b
--- /dev/null
+++ b/mpn/arm/arm-defs.m4

@@ -0,0 +1,100 @@
+divert(-1)
+
+dnl  m4 macros for ARM assembler.
+
+dnl  Copyright 2001, 2012-2016, 2018-2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Standard commenting is with @, the default m4 # is for constants and we
+dnl  don't want to disable macro expansions in or after them.
+
+changecom(@&*$)
+
+define(`ASM_START',
+m4_assert_numargs_range(0,1)
+`ifelse($1,`neon',`.fpu	neon',
+        $1,,`',
+        1,1,`m4_error(`$0 got invalid argument $1')')')
+
+dnl  APCS register names.
+
+deflit(a1,r0)
+deflit(a2,r1)
+deflit(a3,r2)
+deflit(a4,r3)
+dnl deflit(v1,r4)
+dnl deflit(v2,r5)
+dnl deflit(v3,r6)
+dnl deflit(v4,r7)
+dnl deflit(v5,r8)
+dnl deflit(v6,r9)
+deflit(sb,r9)
+dnl deflit(v7,r10)
+deflit(sl,r10)
+deflit(fp,r11)
+deflit(ip,r12)
+dnl deflit(sp,r13)
+deflit(lr,r14)
+deflit(pc,r15)
+
+
+define(`lea_list', `')
+define(`lea_num',0)
+
+dnl  LEA(reg,gmp_symbol)
+dnl
+dnl  Load the address of gmp_symbol into a register.  The gmp_symbol must be
+dnl  either local or protected/hidden, since we assume it has a fixed distance
+dnl  from the point of use.
+
+define(`LEA',`dnl
+ldr	$1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`dnl
+L(bas`'lea_num):dnl
+	add	$1, $1, pc`'dnl
+	m4append(`lea_list',`
+L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2-L(bas'lea_num`)-8')
+	define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+	m4append(`lea_list',`
+L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2')
+	define(`lea_num', eval(lea_num+1))dnl
+')dnl
+')
+
+define(`return',`ifdef(`NOTHUMB',`mov	pc, ',`bx')')
+
+
+define(`EPILOGUE_cpu',
+`lea_list
+	SIZE(`$1',.-`$1')'
+`define(`lea_list', `')')
+
+divert

diff --git a/mpn/arm/bdiv_dbm1c.asm b/mpn/arm/bdiv_dbm1c.asm
new file mode 100644
index 0000000..efc1cb9
--- /dev/null
+++ b/mpn/arm/bdiv_dbm1c.asm

@@ -0,0 +1,113 @@
+dnl  ARM mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.25
+C Cortex-A15	 2.5
+
+C TODO
+C  * Try using umlal or umaal.
+C  * Try using ldm/stm.
+
+define(`qp',	  `r0')
+define(`up',	  `r1')
+define(`n',	  `r2')
+define(`bd',	  `r3')
+define(`cy',	  `sp,#0')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+	push	{r4, r5, r6, r7, r8}
+	ldr	r4, [up], #4
+	ldr	r5, [sp, #20]
+	ands	r12, n, #3
+	beq	L(fi0)
+	cmp	r12, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	umull	r8, r12, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo3)
+
+L(fi0):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	subs	n, n, #1
+	umull	r8, r12, r4, bd
+	bls	L(wd1)
+	ldr	r4, [up], #4
+	b	L(lo1)
+
+L(fi2):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+L(top):	ldr	r4, [up], #4
+	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(lo1):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r8
+	str	r5, [qp], #4
+	sbc	r5, r5, r12
+L(lo0):	umull	r8, r12, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(lo3):	umull	r6, r7, r4, bd
+	ldr	r4, [up], #4
+	subs	r5, r5, r8
+	str	r5, [qp], #4
+	sbc	r5, r5, r12
+L(lo2):	subs	n, n, #4
+	umull	r8, r12, r4, bd
+	bhi	L(top)
+
+L(wd2):	subs	r5, r5, r6
+	str	r5, [qp], #4
+	sbc	r5, r5, r7
+L(wd1):	subs	r5, r5, r8
+	str	r5, [qp]
+	sbc	r0, r5, r12
+	pop	{r4, r5, r6, r7, r8}
+	return	lr
+EPILOGUE()

diff --git a/mpn/arm/bdiv_q_1.asm b/mpn/arm/bdiv_q_1.asm
new file mode 100644
index 0000000..5a89cd8
--- /dev/null
+++ b/mpn/arm/bdiv_q_1.asm

@@ -0,0 +1,162 @@
+dnl  ARM v4 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C 1176		13	18
+C Cortex-A5	 8	12
+C Cortex-A7	10.5	18
+C Cortex-A8	14	15
+C Cortex-A9	10	12		not measured since latest edits
+C Cortex-A15	 9	 9
+C Cortex-A53	14	20
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`d',   `r3')
+define(`di_arg',  `sp[0]')		C	just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r8')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+	tst	d, #1
+	push	{r6-r11}
+	mov	cnt, #0
+	bne	L(inv)
+
+C count trailing zeros
+	movs	r10, d, lsl #16
+	moveq	d, d, lsr #16
+	moveq	cnt, #16
+	tst	d, #0xff
+	moveq	d, d, lsr #8
+	addeq	cnt, cnt, #8
+	LEA(	r10, ctz_tab)
+	and	r11, d, #0xff
+	ldrb	r10, [r10, r11]
+	mov	d, d, lsr r10
+	add	cnt, cnt, r10
+
+C binvert limb
+L(inv):	LEA(	r10, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r10, [r10, r12, lsr #1]
+	mul	r12, r10, r10
+	mul	r12, d, r12
+	rsb	r12, r12, r10, lsl #1
+	mul	r10, r12, r12
+	mul	r10, d, r10
+	rsb	r10, r10, r12, lsl #1	C r10 = inverse
+	b	L(pi1)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	push	{r6-r11}
+
+	ldr	cnt, [sp, #28]
+	ldr	r10, [sp, #24]
+
+L(pi1):	ldr	r11, [up], #4		C up[0]
+	cmp	cnt, #0
+	mov	cy, #0
+	bne	L(unorm)
+
+L(norm):
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edn)
+
+	ALIGN(16)
+L(tpn):	sbcs	cy, r11, cy
+	ldr	r11, [up], #4
+	sub	n, n, #1
+	mul	r9, r10, cy
+	tst	n, n
+	umull	r12, cy, d, r9
+	str	r9, [rp], #4
+	bne	L(tpn)
+
+L(edn):	sbc	cy, r11, cy
+	mul	r9, r10, cy
+	str	r9, [rp]
+	pop	{r6-r11}
+	return	r14
+
+L(unorm):
+	rsb	tnc, cnt, #32
+	mov	r11, r11, lsr cnt
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	sbcs	cy, r9, cy		C critical path ->cy->cy->
+	sub	n, n, #1
+	mul	r9, r10, cy		C critical path ->cy->r9->
+	tst	n, n
+	umull	r12, cy, d, r9		C critical path ->r9->cy->
+	str	r9, [rp], #4
+	bne	L(tpu)
+
+L(edu):	sbc	cy, r11, cy
+	mul	r9, r10, cy
+	str	r9, [rp]
+	pop	{r6-r11}
+	return	r14
+EPILOGUE()
+
+	RODATA
+ctz_tab:
+	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0

diff --git a/mpn/arm/cnd_aors_n.asm b/mpn/arm/cnd_aors_n.asm
new file mode 100644
index 0000000..603d1fa
--- /dev/null
+++ b/mpn/arm/cnd_aors_n.asm

@@ -0,0 +1,134 @@
+dnl  ARM mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3
+C Cortex-A15	 2.5
+
+define(`cnd',	`r0')
+define(`rp',	`r1')
+define(`up',	`r2')
+define(`vp',	`r3')
+
+define(`n',	`r12')
+
+
+ifdef(`OPERATION_cnd_add_n', `
+	define(`ADDSUB',      adds)
+	define(`ADDSUBC',      adcs)
+	define(`INITCY',      `cmn	r0, #0')
+	define(`RETVAL',      `adc	r0, n, #0')
+	define(func,	      mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+	define(`ADDSUB',      subs)
+	define(`ADDSUBC',      sbcs)
+	define(`INITCY',      `cmp	r0, #0')
+	define(`RETVAL',      `adc	r0, n, #0
+			      rsb	r0, r0, #1')
+	define(func,	      mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r11}
+	ldr	n, [sp, #32]
+
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
+
+	INITCY				C really only needed for n = 0 (mod 4)
+
+	ands	r4, n, #3
+	beq	L(top)
+	cmp	r4, #2
+	bcc	L(b1)
+	beq	L(b2)
+
+L(b3):	ldm	vp!, {r4,r5,r6}
+	ldm	up!, {r8,r9,r10}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
+	ADDSUB	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	ADDSUBC	r10, r10, r6
+	stm	rp!, {r8,r9,r10}
+	sub	n, n, #3
+	teq	n, #0
+	bne	L(top)
+	b	L(end)
+
+L(b2):	ldm	vp!, {r4,r5}
+	ldm	up!, {r8,r9}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	ADDSUB	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	stm	rp!, {r8,r9}
+	sub	n, n, #2
+	teq	n, #0
+	bne	L(top)
+	b	L(end)
+
+L(b1):	ldr	r4, [vp], #4
+	ldr	r8, [up], #4
+	bic	r4, r4, cnd
+	ADDSUB	r8, r8, r4
+	str	r8, [rp], #4
+	sub	n, n, #1
+	teq	n, #0
+	beq	L(end)
+
+L(top):	ldm	vp!, {r4,r5,r6,r7}
+	ldm	up!, {r8,r9,r10,r11}
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
+	ADDSUBC	r8, r8, r4
+	ADDSUBC	r9, r9, r5
+	ADDSUBC	r10, r10, r6
+	ADDSUBC	r11, r11, r7
+	sub	n, n, #4
+	stm	rp!, {r8,r9,r10,r11}
+	teq	n, #0
+	bne	L(top)
+
+L(end):	RETVAL
+	pop	{r4-r11}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/com.asm b/mpn/arm/com.asm
new file mode 100644
index 0000000..f89033f
--- /dev/null
+++ b/mpn/arm/com.asm

@@ -0,0 +1,75 @@
+dnl  ARM mpn_com.
+
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.0
+C Cortex-A15	 1.75
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #4
+	mvn	r3, r3
+	str	r3, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r3, r12 }		C load 2 limbs
+	mvn	r3, r3
+	mvn	r12, r12
+	stmia	rp!, { r3, r12 }		C store 2 limbs
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r7, r8, r9 }		C save regs on stack
+
+L(top):	ldmia	up!, { r3, r8, r9, r12 }	C load 4 limbs
+	subs	n, n, #4
+	mvn	r3, r3
+	mvn	r8, r8
+	mvn	r9, r9
+	mvn	r12, r12
+	stmia	rp!, { r3, r8, r9, r12 }	C store 4 limbs
+	bne	L(top)
+
+	ldmfd	sp!, { r7, r8, r9 }		C restore regs from stack
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/mpn/arm/copyd.asm b/mpn/arm/copyd.asm
new file mode 100644
index 0000000..c23c864
--- /dev/null
+++ b/mpn/arm/copyd.asm

@@ -0,0 +1,84 @@
+dnl  ARM mpn_copyd.
+
+dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
+
+dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.25-1.5
+C Cortex-A15	 1.25
+
+C TODO
+C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
+C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+	mov	r12, n, lsl #2
+	sub	r12, r12, #4
+	add	rp, rp, r12
+	add	up, up, r12
+
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #-4
+	str	r3, [rp], #-4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmda	up!, { r3,r12 }
+	stmda	rp!, { r3,r12 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	push	{ r4-r5 }
+	subs	n, n, #4
+	ldmda	up!, { r3,r4,r5,r12 }
+	beq	L(end)
+
+L(top):	subs	n, n, #4
+	stmda	rp!, { r3,r4,r5,r12 }
+	ldmda	up!, { r3,r4,r5,r12 }
+	bne	L(top)
+
+L(end):	stmda	rp, { r3,r4,r5,r12 }
+	pop	{ r4-r5 }
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/mpn/arm/copyi.asm b/mpn/arm/copyi.asm
new file mode 100644
index 0000000..9970594
--- /dev/null
+++ b/mpn/arm/copyi.asm

@@ -0,0 +1,79 @@
+dnl  ARM mpn_copyi.
+
+dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
+
+dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.25-1.5
+C Cortex-A15	 1.25
+
+C TODO
+C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
+C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r3, [up], #4
+	str	r3, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r3,r12 }
+	stmia	rp!, { r3,r12 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+
+	push	{ r4-r5 }
+	subs	n, n, #4
+	ldmia	up!, { r3,r4,r5,r12 }
+	beq	L(end)
+
+L(top):	subs	n, n, #4
+	stmia	rp!, { r3,r4,r5,r12 }
+	ldmia	up!, { r3,r4,r5,r12 }
+	bne	L(top)
+
+L(end):	stm	rp, { r3,r4,r5,r12 }
+	pop	{ r4-r5 }
+L(rtn):	return	lr
+EPILOGUE()

diff --git a/mpn/arm/dive_1.asm b/mpn/arm/dive_1.asm
new file mode 100644
index 0000000..5eefb1a
--- /dev/null
+++ b/mpn/arm/dive_1.asm

@@ -0,0 +1,151 @@
+dnl  ARM v4 mpn_divexact_1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C               cycles/limb       cycles/limb
+C               norm    unorm    modexact_1c_odd
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	10	12
+C Cortex-A15	 9	 9
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`d',  `r3')
+
+define(`cy', `r7')
+define(`cnt', `r6')
+define(`tnc', `r8')
+
+ASM_START()
+PROLOGUE(mpn_divexact_1)
+	tst	d, #1
+	push	{r4-r9}
+	mov	cnt, #0
+	bne	L(inv)
+
+C count trailing zeros
+	movs	r4, d, lsl #16
+	moveq	d, d, lsr #16
+	moveq	cnt, #16
+	tst	d, #0xff
+	moveq	d, d, lsr #8
+	addeq	cnt, cnt, #8
+	LEA(	r4, ctz_tab)
+	and	r5, d, #0xff
+	ldrb	r4, [r4, r5]
+	mov	d, d, lsr r4
+	add	cnt, cnt, r4
+
+C binvert limb
+L(inv):	LEA(	r4, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, lsl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, lsl #1	C r4 = inverse
+
+	tst	cnt, cnt
+	ldr	r5, [up], #4		C up[0]
+	mov	cy, #0
+	bne	L(unnorm)
+
+L(norm):
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edn)
+
+	ALIGN(16)
+L(tpn):	sbcs	cy, r5, cy
+	ldr	r5, [up], #4
+	sub	n, n, #1
+	mul	r9, r4, cy
+	tst	n, n
+	umull	r12, cy, d, r9
+	str	r9, [rp], #4
+	bne	L(tpn)
+
+L(edn):	sbc	cy, r5, cy
+	mul	r9, r4, cy
+	str	r9, [rp]
+	pop	{r4-r9}
+	return	r14
+
+L(unnorm):
+	rsb	tnc, cnt, #32
+	mov	r5, r5, lsr cnt
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	sbcs	cy, r9, cy		C critical path ->cy->cy->
+	sub	n, n, #1
+	mul	r9, r4, cy		C critical path ->cy->r9->
+	tst	n, n
+	umull	r12, cy, d, r9		C critical path ->r9->cy->
+	str	r9, [rp], #4
+	bne	L(tpu)
+
+L(edu):	sbc	cy, r5, cy
+	mul	r9, r4, cy
+	str	r9, [rp]
+	pop	{r4-r9}
+	return	r14
+EPILOGUE()
+
+	RODATA
+ctz_tab:
+	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0

diff --git a/mpn/arm/gmp-mparam.h b/mpn/arm/gmp-mparam.h
new file mode 100644
index 0000000..87eec3a
--- /dev/null
+++ b/mpn/arm/gmp-mparam.h

@@ -0,0 +1,127 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1193MHz ARM (gcc55.fsffrance.org) */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         56
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     71
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                   0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           41
+
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               125
+#define MUL_TOOM44_THRESHOLD               193
+#define MUL_TOOM6H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               418
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     125
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     176
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
+
+#define SQR_BASECASE_THRESHOLD              12
+#define SQR_TOOM2_THRESHOLD                 78
+#define SQR_TOOM3_THRESHOLD                137
+#define SQR_TOOM4_THRESHOLD                212
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                422
+
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               26
+
+#define MUL_FFT_MODF_THRESHOLD             436  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    436, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 28
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    404, 5}, {     13, 4}, {     27, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {    512,10}, \
+    {   1024,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 26
+#define SQR_FFT_THRESHOLD                 3776
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 137
+#define MULLO_MUL_N_THRESHOLD            11479
+
+#define DC_DIV_QR_THRESHOLD                150
+#define DC_DIVAPPR_Q_THRESHOLD             494
+#define DC_BDIV_QR_THRESHOLD               148
+#define DC_BDIV_Q_THRESHOLD                345
+
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               474
+#define INV_APPR_THRESHOLD                 478
+
+#define BINV_NEWTON_THRESHOLD              542
+#define REDC_1_TO_REDC_N_THRESHOLD         117
+
+#define MU_DIV_QR_THRESHOLD               2089
+#define MU_DIVAPPR_Q_THRESHOLD            2172
+#define MUPI_DIV_QR_THRESHOLD              225
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     197
+#define GCD_DC_THRESHOLD                   902
+#define GCDEXT_DC_THRESHOLD                650
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                20
+#define GET_STR_PRECOMPUTE_THRESHOLD        39
+#define SET_STR_DC_THRESHOLD              1045
+#define SET_STR_PRECOMPUTE_THRESHOLD      2147

diff --git a/mpn/arm/invert_limb.asm b/mpn/arm/invert_limb.asm
new file mode 100644
index 0000000..bb55a60
--- /dev/null
+++ b/mpn/arm/invert_limb.asm

@@ -0,0 +1,93 @@
+dnl  ARM mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+	LEA(	r2, approx_tab-512)
+	mov	r3, r0, lsr #23
+	mov	r3, r3, asl #1
+	ldrh	r3, [r3, r2]
+	mov	r1, r3, asl #17
+	mul	r12, r3, r3
+	umull	r3, r2, r12, r0
+	sub	r1, r1, r2, asl #1
+	umull	r3, r2, r1, r1
+	umull	r12, r3, r0, r3
+	umull	r2, r12, r0, r2
+	adds	r2, r2, r3
+	adc	r12, r12, #0
+	rsb	r1, r12, r1
+	mvn	r2, r2, lsr #30
+	add	r2, r2, r1, asl #2
+	umull	r12, r3, r0, r2
+	adds	r1, r12, r0
+	adc	r3, r3, r0
+	rsb	r0, r3, r2
+	return	lr
+EPILOGUE()
+
+	RODATA
+	ALIGN(2)
+approx_tab:
+	.short    0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
+	.short    0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
+	.short    0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
+	.short    0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
+	.short    0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
+	.short    0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
+	.short    0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
+	.short    0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
+	.short    0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
+	.short    0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
+	.short    0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
+	.short    0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
+	.short    0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
+	.short    0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
+	.short    0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
+	.short    0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
+	.short    0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
+	.short    0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
+	.short    0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
+	.short    0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
+	.short    0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
+	.short    0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
+	.short    0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
+	.short    0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
+	.short    0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
+	.short    0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
+	.short    0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
+	.short    0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
+	.short    0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
+	.short    0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
+	.short    0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
+	.short    0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
+ASM_END()

diff --git a/mpn/arm/logops_n.asm b/mpn/arm/logops_n.asm
new file mode 100644
index 0000000..c0ddaf4
--- /dev/null
+++ b/mpn/arm/logops_n.asm

@@ -0,0 +1,139 @@
+dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C            cycles/limb             cycles/limb
+C          and andn ior xor         nand iorn nior xnor
+C StrongARM	 ?			 ?
+C XScale	 ?			 ?
+C Cortex-A7	 ?			 ?
+C Cortex-A8	 ?			 ?
+C Cortex-A9	2.5-2.72		2.75-3
+C Cortex-A15	2.25			2.75
+
+C TODO
+C  * It seems that 2.25 c/l and 2.75 c/l is possible for A9.
+C  * Debug popping issue, see comment below.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic	$1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `bic	$1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{ r8, r9, r10 }
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r10, [vp], #4
+	ldr	r12, [up], #4
+	LOGOP(	r12, r12, r10)
+	POSTOP(	r12)
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	vp!, { r10, r12 }
+	ldmia	up!, { r8, r9 }
+	LOGOP(	r8, r8, r10)
+	LOGOP(	r9, r9, r12)
+	POSTOP(	r8)
+	POSTOP(	r9)
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	push	{ r4, r5, r6, r7 }
+
+	ldmia	vp!, { r8, r9, r10, r12 }
+	b	L(mid)
+
+L(top):	ldmia	vp!, { r8, r9, r10, r12 }
+	POSTOP(	r4)
+	POSTOP(	r5)
+	POSTOP(	r6)
+	POSTOP(	r7)
+	stmia	rp!, { r4, r5, r6, r7 }
+L(mid):	sub	n, n, #4
+	ldmia	up!, { r4, r5, r6, r7 }
+	teq	n, #0
+	LOGOP(	r4, r4, r8)
+	LOGOP(	r5, r5, r9)
+	LOGOP(	r6, r6, r10)
+	LOGOP(	r7, r7, r12)
+	bne	L(top)
+
+	POSTOP(	r4)
+	POSTOP(	r5)
+	POSTOP(	r6)
+	POSTOP(	r7)
+	stmia	rp!, { r4, r5, r6, r7 }
+
+	pop	{ r4, r5, r6, r7 }	C popping r8-r10 here strangely fails
+
+L(rtn):	pop	{ r8, r9, r10 }
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/lshift.asm b/mpn/arm/lshift.asm
new file mode 100644
index 0000000..dd7548b
--- /dev/null
+++ b/mpn/arm/lshift.asm

@@ -0,0 +1,88 @@
+dnl  ARM mpn_lshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.5
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	add	up, up, n, lsl #2
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up, #-4]!
+	add	rp, rp, n, lsl #2
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsl cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #-4]!
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #-4]!
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(mid):	ldr	r6, [up, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r8, lsl cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(1):	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/lshiftc.asm b/mpn/arm/lshiftc.asm
new file mode 100644
index 0000000..18defd8
--- /dev/null
+++ b/mpn/arm/lshiftc.asm

@@ -0,0 +1,95 @@
+dnl  ARM mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.0
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+	add	up, up, n, lsl #2
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up, #-4]!
+	add	rp, rp, n, lsl #2
+	rsb	tnc, cnt, #32
+	mvn	r6, r4
+
+	mov	r7, r6, lsl cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #-4]!
+	mvn	r8, r8
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #-4]!
+	mvn	r6, r6
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r8, r8
+	mov	r7, r6, lsl cnt
+L(mid):	ldr	r6, [up, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r6, r6
+	mov	r7, r8, lsl cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(1):	mvn	r6, #0
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/mod_34lsub1.asm b/mpn/arm/mod_34lsub1.asm
new file mode 100644
index 0000000..456dab5
--- /dev/null
+++ b/mpn/arm/mod_34lsub1.asm

@@ -0,0 +1,124 @@
+dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A5	 2.67
+C Cortex-A7	 2.35
+C Cortex-A8	 2.0
+C Cortex-A9	 1.33
+C Cortex-A15	 1.33
+C Cortex-A17	 3.34
+C Cortex-A53	 2.0
+
+define(`ap',	r0)
+define(`n',	r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Write cleverer summation code.
+C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	push	{ r4, r5, r6, r7 }
+
+	subs	n, n, #3
+	mov	r7, #0
+	blt	L(le2)			C n <= 2
+
+	ldmia	ap!, { r2, r3, r12 }
+	subs	n, n, #3
+	blt	L(sum)			C n <= 5
+	cmn	r0, #0			C clear carry
+	sub	n, n, #3
+	b	L(mid)
+
+L(top):	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+L(mid):	ldmia	ap!, { r4, r5, r6 }
+	tst	n, n
+	sub	n, n, #3
+	bpl	L(top)
+
+	add	n, n, #3
+
+	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+	movcs	r7, #1			C r7 <= 1
+
+L(sum):	cmn	n, #2
+	movlo	r4, #0
+	ldrhs	r4, [ap], #4
+	movls	r5, #0
+	ldrhi	r5, [ap], #4
+
+	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, #0
+	adc	r7, r7, #0		C r7 <= 2
+
+L(sum2):
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	add	r0, r0, r7
+
+	mov	r7, r3, lsl #8
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r3, lsr #16
+
+	mov	r7, r12, lsl #16
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r12, lsr #8
+
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+
+L(le2):	cmn	n, #1
+	bne	L(1)
+	ldmia	ap!, { r2, r3 }
+	mov	r12, #0
+	b	L(sum2)
+L(1):	ldr	r2, [ap]
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+EPILOGUE()

diff --git a/mpn/arm/mode1o.asm b/mpn/arm/mode1o.asm
new file mode 100644
index 0000000..4de4cf6
--- /dev/null
+++ b/mpn/arm/mode1o.asm

@@ -0,0 +1,92 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	10
+C Cortex-A15	 9
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	-
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+	.protected	binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+	stmfd	sp!, {r4, r5}
+
+	LEA(	r4, binvert_limb_table)
+
+	ldr	r5, [up], #4		C up[0]
+
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, asl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, asl #1	C r4 = inverse
+
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(end)
+
+L(top):	sbcs	cy, r5, cy
+	ldr	r5, [up], #4
+	sub	n, n, #1
+	mul	r12, r4, cy
+	tst	n, n
+	umull	r12, cy, d, r12
+	bne	L(top)
+
+L(end):	sbcs	cy, r5, cy
+	mul	r12, r4, cy
+	umull	r12, r0, d, r12
+	addcc	r0, r0, #1
+
+	ldmfd	sp!, {r4, r5}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/mul_1.asm b/mpn/arm/mul_1.asm
new file mode 100644
index 0000000..288368f
--- /dev/null
+++ b/mpn/arm/mul_1.asm

@@ -0,0 +1,94 @@
+dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl  in a second limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	6-8
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.75
+C Cortex-A15	 ?
+
+C We should rewrite this along the lines of addmul_1.asm.  That should save a
+C cycle on StrongARM, and several cycles on XScale.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	stmfd	sp!, { r8, r9, lr }
+	ands	r12, n, #1
+	beq	L(skip1)
+	ldr	lr, [up], #4
+	umull	r9, r12, lr, vl
+	str	r9, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	mov	r8, r12
+	ldmia	up!, { r12, lr }
+	mov	r9, #0
+	umlal	r8, r9, r12, vl
+	mov	r12, #0
+	umlal	r9, r12, lr, vl
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(rtn)
+	stmfd	sp!, { r6, r7 }
+
+L(top):	mov	r6, r12
+	ldmia	up!, { r8, r9, r12, lr }
+	ldr	r7, [rp, #12]			C cache allocate
+	mov	r7, #0
+	umlal	r6, r7, r8, vl
+	mov	r8, #0
+	umlal	r7, r8, r9, vl
+	mov	r9, #0
+	umlal	r8, r9, r12, vl
+	mov	r12, #0
+	umlal	r9, r12, lr, vl
+	subs	n, n, #4
+	stmia	rp!, { r6, r7, r8, r9 }
+	bne	L(top)
+
+	ldmfd	sp!, { r6, r7 }
+
+L(rtn):	mov	r0, r12
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE()

diff --git a/mpn/arm/neon/hamdist.asm b/mpn/arm/neon/hamdist.asm
new file mode 100644
index 0000000..0f7acb4
--- /dev/null
+++ b/mpn/arm/neon/hamdist.asm

@@ -0,0 +1,194 @@
+dnl  ARM Neon mpn_hamdist -- mpn bit hamming distance.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.89
+C Cortex-A15	 0.95
+
+C TODO
+C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
+C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
+C    popcount. Except perhaps also for popcount for the edge loads.)
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+C INPUT PARAMETERS
+define(`ap', r0)
+define(`bp', r1)
+define(`n',  r2)
+
+C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
+C can be represented as a 8-bit ARM constant.
+C
+define(`chunksize',0x3f80)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+
+	cmp	n, #chunksize
+	bhi	L(gt16k)
+
+L(lt16k):
+	vmov.i64   q8, #0		C clear summation register
+	vmov.i64   q9, #0		C clear summation register
+
+	tst	   n, #1
+	beq	   L(xxx0)
+	vmov.i64   d0, #0
+	vmov.i64   d20, #0
+	sub	   n, n, #1
+	vld1.32   {d0[0]}, [ap]!	C load 1 limb
+	vld1.32   {d20[0]}, [bp]!	C load 1 limb
+	veor	   d0, d0, d20
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
+
+L(xxx0):tst	   n, #2
+	beq	   L(xx00)
+	sub	   n, n, #2
+	vld1.32    {d0}, [ap]!		C load 2 limbs
+	vld1.32    {d20}, [bp]!		C load 2 limbs
+	veor	   d0, d0, d20
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24
+
+L(xx00):tst	   n, #4
+	beq	   L(x000)
+	sub	   n, n, #4
+	vld1.32    {q0}, [ap]!		C load 4 limbs
+	vld1.32    {q10}, [bp]!		C load 4 limbs
+	veor	   q0, q0, q10
+	vcnt.8	   q12, q0
+	vpadal.u8  q8, q12
+
+L(x000):tst	   n, #8
+	beq	   L(0000)
+
+	subs	   n, n, #8
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	bls	   L(sum)
+
+L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	sub	   n, n, #8
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	b	   L(mid)
+
+L(0000):subs	   n, n, #16
+	blo	   L(e0)
+
+	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	veor	   q2, q2, q14
+	veor	   q3, q3, q15
+	vcnt.8	   q12, q2
+	vcnt.8	   q13, q3
+	subs	   n, n, #16
+	blo	   L(end)
+
+L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
+	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q0
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q1
+L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
+	veor	   q2, q2, q14
+	veor	   q3, q3, q15
+	subs	   n, n, #16
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q2
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q3
+	bhs	   L(top)
+
+L(end):	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+L(sum):	veor	   q0, q0, q10
+	veor	   q1, q1, q11
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+	vadd.i16   q8, q8, q9
+					C we have 8 16-bit counts
+L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
+	vpaddl.u32 q8, q8		C we have 2 64-bit counts
+	vmov.32    r0, d16[0]
+	vmov.32    r1, d17[0]
+	add	   r0, r0, r1
+	bx	lr
+
+C Code for large count.  Splits operand and calls above code.
+define(`ap2', r5)
+define(`bp2', r6)
+L(gt16k):
+	push	{r4,r5,r6,r14}
+	mov	ap2, ap
+	mov	bp2, bp
+	mov	r3, n			C full count
+	mov	r4, #0			C total sum
+
+1:	mov	n, #chunksize		C count for this invocation
+	bl	L(lt16k)		C could jump deep inside code
+	add	ap2, ap2, #chunksize*4	C point at next chunk
+	add	bp2, bp2, #chunksize*4	C point at next chunk
+	add	r4, r4, r0
+	mov	ap, ap2			C put chunk pointer in place for call
+	mov	bp, bp2			C put chunk pointer in place for call
+	sub	r3, r3, #chunksize
+	cmp	r3, #chunksize
+	bhi	1b
+
+	mov	n, r3			C count for final invocation
+	bl	L(lt16k)
+	add	r0, r4, r0
+	pop	{r4,r5,r6,pc}
+EPILOGUE()

diff --git a/mpn/arm/neon/lorrshift.asm b/mpn/arm/neon/lorrshift.asm
new file mode 100644
index 0000000..d1950da
--- /dev/null
+++ b/mpn/arm/neon/lorrshift.asm

@@ -0,0 +1,279 @@
+dnl  ARM Neon mpn_lshift and mpn_rshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb     cycles/limb     cycles/limb      good
+C              aligned	      unaligned	      best seen	     for cpu?
+C StrongARM	 -		 -
+C XScale	 -		 -
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3		 3				Y
+C Cortex-A15	 1.5		 1.5				Y
+
+
+C We read 64 bits at a time at 32-bit aligned addresses, and except for the
+C first and last store, we write using 64-bit aligned addresses.  All shifting
+C is done on 64-bit words in 'extension' registers.
+C
+C It should be possible to read also using 64-bit alignment, by manipulating
+C the shift count for unaligned operands.  Not done, since it does not seem to
+C matter for A9 or A15.
+C
+C This will not work in big-endian mode.
+
+C TODO
+C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
+C    which might make it tricky.
+C  * Clean up and simplify.
+C  * Consider sharing most of the code for lshift and rshift, since the feed-in
+C    code, the loop, and most of the wind-down code are identical.
+C  * Replace the basecase code with code using 'extension' registers.
+C  * Optimise.  It is not clear that this loop insn permutation is optimal for
+C    either A9 or A15.
+
+C INPUT PARAMETERS
+define(`rp',  `r0')
+define(`ap',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+
+ifdef(`OPERATION_lshift',`
+	define(`IFLSH', `$1')
+	define(`IFRSH', `')
+	define(`X',`0')
+	define(`Y',`1')
+	define(`func',`mpn_lshift')
+')
+ifdef(`OPERATION_rshift',`
+	define(`IFLSH', `')
+	define(`IFRSH', `$1')
+	define(`X',`1')
+	define(`Y',`0')
+	define(`func',`mpn_rshift')
+')
+
+MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
+
+ASM_START(neon)
+	TEXT
+	ALIGN(64)
+PROLOGUE(func)
+IFLSH(`	mov	r12, n, lsl #2	')
+IFLSH(`	add	rp, rp, r12	')
+IFLSH(`	add	ap, ap, r12	')
+
+	cmp	n, #4			C SIMD code n limit
+	ble	L(base)
+
+ifdef(`OPERATION_lshift',`
+	vdup.32	d6, r3			C left shift count is positive
+	sub	r3, r3, #64		C right shift count is negative
+	vdup.32	d7, r3
+	mov	r12, #-8')		C lshift pointer update offset
+ifdef(`OPERATION_rshift',`
+	rsb	r3, r3, #0		C right shift count is negative
+	vdup.32	d6, r3
+	add	r3, r3, #64		C left shift count is positive
+	vdup.32	d7, r3
+	mov	r12, #8')		C rshift pointer update offset
+
+IFLSH(`	sub	ap, ap, #8	')
+	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
+	vshl.u64 d18, d19, d7		C retval
+
+	tst	rp, #4			C is rp 64-bit aligned already?
+	beq	L(rp_aligned)		C yes, skip
+IFLSH(`	add	ap, ap, #4	')	C move back ap pointer
+IFRSH(`	sub	ap, ap, #4	')	C move back ap pointer
+	vshl.u64 d4, d19, d6
+	sub	n, n, #1		C first limb handled
+IFLSH(`	sub	 rp, rp, #4	')
+	vst1.32	 {d4[Y]}, [rp]IFRSH(!)	C store first limb, rp gets aligned
+	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
+
+L(rp_aligned):
+IFLSH(`	sub	rp, rp, #8	')
+	subs	n, n, #6
+	blt	L(two_or_three_more)
+	tst	n, #2
+	beq	L(2)
+
+L(1):	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d5, d19, d6
+	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	sub	n, n, #2
+	b	 L(mid)
+
+L(2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	subs	n, n, #4
+	blt	L(end)
+
+L(top):	vld1.32	 {d16}, [ap], r12
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+L(mid):	vld1.32	 {d17}, [ap], r12
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	subs	n, n, #4
+	bge	L(top)
+
+L(end):	tst	 n, #1
+	beq	 L(evn)
+
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	b	 L(cj1)
+
+L(evn):	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d16, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+	vorr	 d2, d5, d0
+	b	 L(cj2)
+
+C Load last 2 - 3 limbs, store last 4 - 5 limbs
+L(two_or_three_more):
+	tst	n, #1
+	beq	L(l2)
+
+L(l3):	vshl.u64 d5, d19, d6
+	vld1.32	 {d17}, [ap], r12
+L(cj1):	veor	 d16, d16, d16
+IFLSH(`	add	 ap, ap, #4	')
+	vld1.32	 {d16[Y]}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+IFLSH(`	add	 rp, rp, #4	')
+	vst1.32	 {d5[Y]}, [rp]
+	vmov.32	 r0, d18[X]
+	bx	lr
+
+L(l2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vshl.u64 d1, d16, d7
+	vshl.u64 d16, d16, d6
+	vorr	 d2, d4, d1
+L(cj2):	vst1.32	 {d2}, [rp:64], r12
+	vst1.32	 {d16}, [rp]
+	vmov.32	 r0, d18[X]
+	bx	lr
+
+
+define(`tnc', `r12')
+L(base):
+	push	{r4, r6, r7, r8}
+ifdef(`OPERATION_lshift',`
+	ldr	r4, [ap, #-4]!
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsl cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #-4]!
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #-4]!
+	subs	n, n, #2
+	beq	L(ed)			C n = 3
+					C n = 4
+L(tp):	ldr	r8, [ap, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(md):	ldr	r6, [ap, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r8, lsl cnt
+
+L(ed):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(ed1):	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+')
+ifdef(`OPERATION_rshift',`
+	ldr	r4, [ap]
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsr cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #4]!
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #4]!
+	subs	n, n, #2
+	beq	L(ed)			C n = 2
+					C n = 4
+
+L(tp):	ldr	r8, [ap, #4]!
+	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(md):	ldr	r6, [ap, #4]!
+	orr	r7, r7, r8, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r8, lsr cnt
+
+L(ed):	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(ed1):	str	r7, [rp], #4
+	mov	r0, r4, lsl tnc
+')
+	pop	{r4, r6, r7, r8}
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/neon/lshiftc.asm b/mpn/arm/neon/lshiftc.asm
new file mode 100644
index 0000000..9e7ca18
--- /dev/null
+++ b/mpn/arm/neon/lshiftc.asm

@@ -0,0 +1,242 @@
+dnl  ARM Neon mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb     cycles/limb     cycles/limb      good
+C              aligned	      unaligned	      best seen	     for cpu?
+C StrongARM	 -		 -
+C XScale	 -		 -
+C Cortex-A7	 ?		 ?
+C Cortex-A8	 ?		 ?
+C Cortex-A9	 3.5		 3.5				Y
+C Cortex-A15	 1.75		 1.75				Y
+
+
+C We read 64 bits at a time at 32-bit aligned addresses, and except for the
+C first and last store, we write using 64-bit aligned addresses.  All shifting
+C is done on 64-bit words in 'extension' registers.
+C
+C It should be possible to read also using 64-bit alignment, by manipulating
+C the shift count for unaligned operands.  Not done, since it does not seem to
+C matter for A9 or A15.
+C
+C This will not work in big-endian mode.
+
+C TODO
+C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
+C    which might make it tricky.
+C  * Clean up and simplify.
+C  * Consider sharing most of the code for lshift and rshift, since the feed-in
+C    code, the loop, and most of the wind-down code are identical.
+C  * Replace the basecase code with code using 'extension' registers.
+C  * Optimise.  It is not clear that this loop insn permutation is optimal for
+C    either A9 or A15.
+
+C INPUT PARAMETERS
+define(`rp',  `r0')
+define(`ap',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+
+ASM_START(neon)
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_lshiftc)
+	mov	r12, n, lsl #2
+	add	rp, rp, r12
+	add	ap, ap, r12
+
+	cmp	n, #4			C SIMD code n limit
+	ble	L(base)
+
+	vdup.32	d6, r3			C left shift count is positive
+	sub	r3, r3, #64		C right shift count is negative
+	vdup.32	d7, r3
+	mov	r12, #-8		C lshift pointer update offset
+
+	sub	ap, ap, #8
+	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
+	vshl.u64 d18, d19, d7		C retval
+
+	tst	rp, #4			C is rp 64-bit aligned already?
+	beq	L(rp_aligned)		C yes, skip
+	vmvn	 d19, d19
+	add	ap, ap, #4		C move back ap pointer
+	vshl.u64 d4, d19, d6
+	sub	n, n, #1		C first limb handled
+	sub	 rp, rp, #4
+	vst1.32	 {d4[1]}, [rp]		C store first limb, rp gets aligned
+	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
+
+L(rp_aligned):
+	sub	rp, rp, #8
+	subs	n, n, #6
+	vmvn	 d19, d19
+	blt	L(two_or_three_more)
+	tst	n, #2
+	beq	L(2)
+
+L(1):	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d5, d19, d6
+	vmvn	 d17, d17
+	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	sub	n, n, #2
+	b	 L(mid)
+
+L(2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vmvn	 d16, d16
+	vld1.32	 {d17}, [ap], r12
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	subs	n, n, #4
+	blt	L(end)
+
+L(top):	vmvn	 d17, d17
+	vld1.32	 {d16}, [ap], r12
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+L(mid):	vmvn	 d16, d16
+	vld1.32	 {d17}, [ap], r12
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	subs	n, n, #4
+	bge	L(top)
+
+L(end):	tst	 n, #1
+	beq	 L(evn)
+
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	b	 L(cj1)
+
+L(evn):	vmvn	 d17, d17
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vst1.32	 {d2}, [rp:64], r12
+	vmov.u8	 d17, #255
+	vorr	 d2, d5, d0
+	vshl.u64 d0, d17, d7
+	vorr	 d3, d4, d0
+	b	 L(cj2)
+
+C Load last 2 - 3 limbs, store last 4 - 5 limbs
+L(two_or_three_more):
+	tst	n, #1
+	beq	L(l2)
+
+L(l3):	vshl.u64 d5, d19, d6
+	vld1.32	 {d17}, [ap], r12
+L(cj1):	vmov.u8	 d16, #0
+	add	 ap, ap, #4
+	vmvn	 d17, d17
+	vld1.32	 {d16[1]}, [ap], r12
+	vshl.u64 d0, d17, d7
+	vshl.u64 d4, d17, d6
+	vmvn	 d16, d16
+	vorr	 d3, d5, d0
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vst1.32	 {d3}, [rp:64], r12
+	vorr	 d2, d4, d1
+	vst1.32	 {d2}, [rp:64], r12
+	add	 rp, rp, #4
+	vst1.32	 {d5[1]}, [rp]
+	vmov.32	 r0, d18[0]
+	bx	lr
+
+L(l2):	vld1.32	 {d16}, [ap], r12
+	vshl.u64 d4, d19, d6
+	vmvn	 d16, d16
+	vshl.u64 d1, d16, d7
+	vshl.u64 d5, d16, d6
+	vmov.u8	 d17, #255
+	vorr	 d2, d4, d1
+	vshl.u64 d0, d17, d7
+	vorr	 d3, d5, d0
+L(cj2):	vst1.32	 {d2}, [rp:64], r12
+	vst1.32	 {d3}, [rp]
+	vmov.32	 r0, d18[0]
+	bx	lr
+
+
+define(`tnc', `r12')
+L(base):
+	push	{r4, r6, r7, r8}
+	ldr	r4, [ap, #-4]!
+	rsb	tnc, cnt, #32
+	mvn	r6, r4
+
+	mov	r7, r6, lsl cnt
+	tst	n, #1
+	beq	L(ev)			C n even
+
+L(od):	subs	n, n, #2
+	bcc	L(ed1)			C n = 1
+	ldr	r8, [ap, #-4]!
+	mvn	r8, r8
+	b	L(md)			C n = 3
+
+L(ev):	ldr	r6, [ap, #-4]!
+	mvn	r6, r6
+	subs	n, n, #2
+	beq	L(ed)			C n = 3
+					C n = 4
+L(tp):	ldr	r8, [ap, #-4]!
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r8, r8
+	mov	r7, r6, lsl cnt
+L(md):	ldr	r6, [ap, #-4]!
+	orr	r7, r7, r8, lsr tnc
+	str	r7, [rp, #-4]!
+	mvn	r6, r6
+	mov	r7, r8, lsl cnt
+
+L(ed):	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]!
+	mov	r7, r6, lsl cnt
+L(ed1):	mvn	r6, #0
+	orr	r7, r7, r6, lsr tnc
+	str	r7, [rp, #-4]
+	mov	r0, r4, lsr tnc
+	pop	{r4, r6, r7, r8}
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/neon/popcount.asm b/mpn/arm/neon/popcount.asm
new file mode 100644
index 0000000..2cabbda
--- /dev/null
+++ b/mpn/arm/neon/popcount.asm

@@ -0,0 +1,166 @@
+dnl  ARM Neon mpn_popcount -- mpn bit population count.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.125
+C Cortex-A15	 0.56
+
+C TODO
+C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
+C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
+C    popcount. Except perhaps also for popcount for the edge loads.)
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+C INPUT PARAMETERS
+define(`ap', r0)
+define(`n',  r1)
+
+C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
+C can be represented as a 8-bit ARM constant.
+C
+define(`chunksize',0x3f80)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+
+	cmp	n, #chunksize
+	bhi	L(gt16k)
+
+L(lt16k):
+	vmov.i64   q8, #0		C clear summation register
+	vmov.i64   q9, #0		C clear summation register
+
+	tst	   n, #1
+	beq	   L(xxx0)
+	vmov.i64   d0, #0
+	sub	   n, n, #1
+	vld1.32   {d0[0]}, [ap]!	C load 1 limb
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
+
+L(xxx0):tst	   n, #2
+	beq	   L(xx00)
+	sub	   n, n, #2
+	vld1.32    {d0}, [ap]!		C load 2 limbs
+	vcnt.8	   d24, d0
+	vpadal.u8  d16, d24
+
+L(xx00):tst	   n, #4
+	beq	   L(x000)
+	sub	   n, n, #4
+	vld1.32    {q0}, [ap]!		C load 4 limbs
+	vcnt.8	   q12, q0
+	vpadal.u8  q8, q12
+
+L(x000):tst	   n, #8
+	beq	   L(0000)
+
+	subs	   n, n, #8
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	bls	   L(sum)
+
+L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	sub	   n, n, #8
+	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	b	   L(mid)
+
+L(0000):subs	   n, n, #16
+	blo	   L(e0)
+
+	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	vcnt.8	   q12, q2
+	vcnt.8	   q13, q3
+	subs	   n, n, #16
+	blo	   L(end)
+
+L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q0
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q1
+L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
+	subs	   n, n, #16
+	vpadal.u8  q8, q12
+	vcnt.8	   q12, q2
+	vpadal.u8  q9, q13
+	vcnt.8	   q13, q3
+	bhs	   L(top)
+
+L(end):	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+L(sum):	vcnt.8	   q12, q0
+	vcnt.8	   q13, q1
+	vpadal.u8  q8, q12
+	vpadal.u8  q9, q13
+	vadd.i16   q8, q8, q9
+					C we have 8 16-bit counts
+L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
+	vpaddl.u32 q8, q8		C we have 2 64-bit counts
+	vmov.32    r0, d16[0]
+	vmov.32    r1, d17[0]
+	add	   r0, r0, r1
+	bx	lr
+
+C Code for large count.  Splits operand and calls above code.
+define(`ap2', r2)			C caller-saves reg not used above
+L(gt16k):
+	push	{r4,r14}
+	mov	ap2, ap
+	mov	r3, n			C full count
+	mov	r4, #0			C total sum
+
+1:	mov	n, #chunksize		C count for this invocation
+	bl	L(lt16k)		C could jump deep inside code
+	add	ap2, ap2, #chunksize*4	C point at next chunk
+	add	r4, r4, r0
+	mov	ap, ap2			C put chunk pointer in place for call
+	sub	r3, r3, #chunksize
+	cmp	r3, #chunksize
+	bhi	1b
+
+	mov	n, r3			C count for final invocation
+	bl	L(lt16k)
+	add	r0, r4, r0
+	pop	{r4,pc}
+EPILOGUE()

diff --git a/mpn/arm/neon/sec_tabselect.asm b/mpn/arm/neon/sec_tabselect.asm
new file mode 100644
index 0000000..7d3fecd
--- /dev/null
+++ b/mpn/arm/neon/sec_tabselect.asm

@@ -0,0 +1,140 @@
+dnl  ARM Neon mpn_sec_tabselect.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.15
+C Cortex-A15	 0.65
+
+define(`rp',     `r0')
+define(`tp',     `r1')
+define(`n',      `r2')
+define(`nents',  `r3')
+C define(`which',  on stack)
+
+define(`i',      `r4')
+define(`j',      `r5')
+
+define(`maskq',  `q10')
+define(`maskd',  `d20')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+	push	{r4-r5}
+
+	add	  r4, sp, #8
+	vld1.32	  {d30[], d31[]}, [r4]	C 4 `which' copies
+	vmov.i32  q14, #1		C 4 copies of 1
+
+	subs	j, n, #8
+	bmi	L(outer_end)
+
+L(outer_top):
+	mov	  i, nents
+	mov	  r12, tp		C preserve tp
+	veor	  q13, q13, q13		C 4 counter copies
+	veor	  q2, q2, q2
+	veor	  q3, q3, q3
+	ALIGN(16)
+L(top):	vceq.i32  maskq, q13, q15	C compare idx copies to `which' copies
+	vld1.32	  {q0,q1}, [tp]
+	vadd.i32  q13, q13, q14
+	vbit	  q2, q0, maskq
+	vbit	  q3, q1, maskq
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(top)
+	vst1.32	  {q2,q3}, [rp]!
+	add	  tp, r12, #32		C restore tp, point to next slice
+	subs	  j, j, #8
+	bpl	  L(outer_top)
+L(outer_end):
+
+	tst	  n, #4
+	beq	  L(b0xx)
+L(b1xx):mov	  i, nents
+	mov	  r12, tp
+	veor	  q13, q13, q13
+	veor	  q2, q2, q2
+	ALIGN(16)
+L(tp4):	vceq.i32  maskq, q13, q15
+	vld1.32	  {q0}, [tp]
+	vadd.i32  q13, q13, q14
+	vbit	  q2, q0, maskq
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp4)
+	vst1.32	  {q2}, [rp]!
+	add	  tp, r12, #16
+
+L(b0xx):tst	  n, #2
+	beq	  L(b00x)
+L(b01x):mov	  i, nents
+	mov	  r12, tp
+	veor	  d26, d26, d26
+	veor	  d4, d4, d4
+	ALIGN(16)
+L(tp2):	vceq.i32  maskd, d26, d30
+	vld1.32	  {d0}, [tp]
+	vadd.i32  d26, d26, d28
+	vbit	  d4, d0, maskd
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp2)
+	vst1.32	  {d4}, [rp]!
+	add	  tp, r12, #8
+
+L(b00x):tst	  n, #1
+	beq	  L(b000)
+L(b001):mov	  i, nents
+	mov	  r12, tp
+	veor	  d26, d26, d26
+	veor	  d4, d4, d4
+	ALIGN(16)
+L(tp1):	vceq.i32  maskd, d26, d30
+	vld1.32	  {d0[0]}, [tp]
+	vadd.i32  d26, d26, d28
+	vbit	  d4, d0, maskd
+	add	  tp, tp, n, lsl #2
+	subs	  i, i, #1
+	bne	  L(tp1)
+	vst1.32	  {d4[0]}, [rp]
+
+L(b000):pop	{r4-r5}
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/rsh1aors_n.asm b/mpn/arm/rsh1aors_n.asm
new file mode 100644
index 0000000..c99f34c
--- /dev/null
+++ b/mpn/arm/rsh1aors_n.asm

@@ -0,0 +1,124 @@
+dnl  ARM mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	3.64-3.7
+C Cortex-A15	 2.5
+
+C TODO
+C  * Not optimised.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`RSTCY',	`cmn	$1, $1')
+  define(`func',	mpn_rsh1add_n)
+  define(`func_nc',	mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`RSTCY',
+	`mvn	$2, #0x80000000
+	cmp	$2, $1')
+  define(`func',	mpn_rsh1sub_n)
+  define(`func_nc',	mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{r4-r11}
+	ldr	r4, [up], #4
+	ldr	r8, [vp], #4
+	ADDSUB	r4, r4, r8
+	movs	r12, r7, rrx
+	and	r11, r4, #1	C return value
+	subs	n, n, #4
+	blo	L(end)
+
+L(top):	ldmia	up!, {r5,r6,r7}
+	ldmia	vp!, {r8,r9,r10}
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	ADDSUBC	r6, r6, r9
+	ADDSUBC	r7, r7, r10
+	movs	r12, r7, rrx
+	movs	r6, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
+	subs	n, n, #3
+	stmia	rp!, {r4,r5,r6}
+	mov	r4, r7
+	bhs	L(top)
+
+L(end):	cmn	n, #2
+	bls	L(e2)
+	ldm	up, {r5,r6}
+	ldm	vp, {r8,r9}
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	ADDSUBC	r6, r6, r9
+	movs	r12, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
+	stmia	rp!, {r4,r5}
+	mov	r4, r6
+	b	L(e1)
+
+L(e2):	bne	L(e1)
+	ldr	r5, [up, #0]
+	ldr	r8, [vp, #0]
+	cmn	r12, r12
+	ADDSUBC	r5, r5, r8
+	movs	r12, r5, rrx
+	movs	r4, r4, rrx
+	str	r4, [rp], #4
+	mov	r4, r5
+
+L(e1):	RSTCY(	r12, r1)
+	mov	r4, r4, rrx
+	str	r4, [rp, #0]
+	mov	r0, r11
+	pop	{r4-r11}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/rshift.asm b/mpn/arm/rshift.asm
new file mode 100644
index 0000000..4fc7ae1
--- /dev/null
+++ b/mpn/arm/rshift.asm

@@ -0,0 +1,86 @@
+dnl  ARM mpn_rshift.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.5
+C Cortex-A15	 ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	push	{r4, r6, r7, r8}
+	ldr	r4, [up]
+	rsb	tnc, cnt, #32
+
+	mov	r7, r4, lsr cnt
+	tst	n, #1
+	beq	L(evn)			C n even
+
+L(odd):	subs	n, n, #2
+	bcc	L(1)			C n = 1
+	ldr	r8, [up, #4]!
+	b	L(mid)
+
+L(evn):	ldr	r6, [up, #4]!
+	subs	n, n, #2
+	beq	L(end)
+
+L(top):	ldr	r8, [up, #4]!
+	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(mid):	ldr	r6, [up, #4]!
+	orr	r7, r7, r8, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r8, lsr cnt
+	subs	n, n, #2
+	bgt	L(top)
+
+L(end):	orr	r7, r7, r6, lsl tnc
+	str	r7, [rp], #4
+	mov	r7, r6, lsr cnt
+L(1):	str	r7, [rp]
+	mov	r0, r4, lsl tnc
+	pop	{r4, r6, r7, r8}
+	return	r14
+EPILOGUE()

diff --git a/mpn/arm/sec_tabselect.asm b/mpn/arm/sec_tabselect.asm
new file mode 100644
index 0000000..348ef24
--- /dev/null
+++ b/mpn/arm/sec_tabselect.asm

@@ -0,0 +1,131 @@
+dnl  ARM mpn_sec_tabselect
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.33
+C Cortex-A15	 2.2
+
+C TODO
+C  * Consider using special code for small nents, either swapping the inner and
+C    outer loops, or providing a few completely unrolling the inner loops.
+
+define(`rp',    `r0')
+define(`tp',    `r1')
+define(`n',     `r2')
+define(`nents', `r3')
+C      which  on stack
+
+define(`i',     `r11')
+define(`j',     `r12')
+define(`c',     `r14')
+define(`mask',  `r7')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+	push	{r4-r11, r14}
+
+	subs	j, n, #3
+	bmi	L(outer_end)
+L(outer_top):
+	ldr	c, [sp, #36]
+	mov	i, nents
+	push	{tp}
+
+	mov	r8, #0
+	mov	r9, #0
+	mov	r10, #0
+
+L(top):	subs	c, c, #1
+	ldm	tp, {r4,r5,r6}
+	sbc	mask, mask, mask
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	and	r5, r5, mask
+	and	r6, r6, mask
+	orr	r8, r8, r4
+	orr	r9, r9, r5
+	orr	r10, r10, r6
+	bge	L(top)
+
+	stmia	rp!, {r8,r9,r10}
+	pop	{tp}
+	add	tp, tp, #12
+	subs	j, j, #3
+	bpl	L(outer_top)
+L(outer_end):
+
+	cmp	j, #-1
+	bne	L(n2)
+
+	ldr	c, [sp, #36]
+	mov	i, nents
+	mov	r8, #0
+	mov	r9, #0
+L(tp2):	subs	c, c, #1
+	sbc	mask, mask, mask
+	ldm	tp, {r4,r5}
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	and	r5, r5, mask
+	orr	r8, r8, r4
+	orr	r9, r9, r5
+	bge	L(tp2)
+	stmia	rp, {r8,r9}
+	pop	{r4-r11, r14}
+	return	lr
+
+L(n2):	cmp	j, #-2
+	bne	L(n1)
+
+	ldr	c, [sp, #36]
+	mov	i, nents
+	mov	r8, #0
+L(tp1):	subs	c, c, #1
+	sbc	mask, mask, mask
+	ldr	r4, [tp]
+	subs	i, i, #1
+	add	tp, tp, n, lsl #2
+	and	r4, r4, mask
+	orr	r8, r8, r4
+	bge	L(tp1)
+	str	r8, [rp]
+L(n1):	pop	{r4-r11, r14}
+	return	lr
+EPILOGUE()

diff --git a/mpn/arm/udiv.asm b/mpn/arm/udiv.asm
new file mode 100644
index 0000000..bc24421
--- /dev/null
+++ b/mpn/arm/udiv.asm

@@ -0,0 +1,104 @@
+dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
+dnl  Return quotient and store remainder through a supplied pointer.
+
+dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C INPUT PARAMETERS
+define(`rem_ptr',`r0')
+define(`n1',`r1')
+define(`n0',`r2')
+define(`d',`r3')
+
+C divstep -- develop one quotient bit.  Dividend in $1$2, divisor in $3.
+C Quotient bit is shifted into $2.
+define(`divstep',
+       `adcs	$2, $2, $2
+	adc	$1, $1, $1
+	cmp	$1, $3
+	subcs	$1, $1, $3')
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+	mov	r12, #8			C loop counter for both loops below
+	cmp	d, #0x80000000		C check divisor msb and clear carry
+	bcs	L(_large_divisor)
+
+L(oop):	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	sub	r12, r12, #1
+	teq	r12, #0
+	bne	L(oop)
+
+	str	n1, [rem_ptr]		C store remainder
+	adc	r0, n0, n0		C quotient: add last carry from divstep
+	return	lr
+
+L(_large_divisor):
+	stmfd	sp!, { r8, lr }
+
+	and	r8, n0, #1		C save lsb of dividend
+	mov	lr, n1, lsl #31
+	orrs	n0, lr, n0, lsr #1	C n0 = lo(n1n0 >> 1)
+	mov	n1, n1, lsr #1		C n1 = hi(n1n0 >> 1)
+
+	and	lr, d, #1		C save lsb of divisor
+	movs	d, d, lsr #1		C d = floor(orig_d / 2)
+	adc	d, d, #0		C d = ceil(orig_d / 2)
+
+L(oop2):
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	divstep(n1,n0,d)
+	sub	r12, r12, #1
+	teq	r12, #0
+	bne	L(oop2)
+
+	adc	n0, n0, n0		C shift and add last carry from divstep
+	add	n1, r8, n1, lsl #1	C shift in omitted dividend lsb
+	tst	lr, lr			C test saved divisor lsb
+	beq	L(_even_divisor)
+
+	rsb	d, lr, d, lsl #1	C restore orig d value
+	adds	n1, n1, n0		C fix remainder for omitted divisor lsb
+	addcs	n0, n0, #1		C adjust quotient if rem. fix carried
+	subcs	n1, n1, d		C adjust remainder accordingly
+	cmp	n1, d			C remainder >= divisor?
+	subcs	n1, n1, d		C adjust remainder
+	addcs	n0, n0, #1		C adjust quotient
+
+L(_even_divisor):
+	str	n1, [rem_ptr]		C store remainder
+	mov	r0, n0			C quotient
+	ldmfd	sp!, { r8, pc }
+EPILOGUE(mpn_udiv_qrnnd)

diff --git a/mpn/arm/v5/gcd_11.asm b/mpn/arm/v5/gcd_11.asm
new file mode 100644
index 0000000..fa3ea44
--- /dev/null
+++ b/mpn/arm/v5/gcd_11.asm

@@ -0,0 +1,70 @@
+dnl  ARM v5 mpn_gcd_11.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A5	 6.45	obsolete
+C Cortex-A7	 6.41	obsolete
+C Cortex-A8	 5.0	obsolete
+C Cortex-A9	 5.9	obsolete
+C Cortex-A15	 4.40	obsolete
+C Cortex-A17	 5.68	obsolete
+C Cortex-A53	 4.37	obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0',    `r0')
+define(`v0',    `r1')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	subs	r3, u0, v0	C			0
+	beq	L(end)		C
+
+	ALIGN(16)
+L(top):	sub	r2, v0, u0	C			0,5
+	and	r12, r2, r3	C			1
+	clz	r12, r12	C			2
+	rsb	r12, r12, #31	C			3
+	rsbcc	r3, r3, #0	C v = abs(u-v), even	1
+	movcs	u0, v0		C u = min(u,v)		1
+	lsr	v0, r3, r12	C			4
+	subs	r3, u0, v0	C			5
+	bne	L(top)		C
+
+L(end):	bx	lr
+EPILOGUE()

diff --git a/mpn/arm/v5/gcd_22.asm b/mpn/arm/v5/gcd_22.asm
new file mode 100644
index 0000000..6888616
--- /dev/null
+++ b/mpn/arm/v5/gcd_22.asm

@@ -0,0 +1,117 @@
+dnl  ARM v5 mpn_gcd_22.
+
+dnl  Copyright 2019, 2022 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 -
+C ARM11		13
+C Cortex-A5	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 ?
+C Cortex-A12	 ?
+C Cortex-A15	 ?
+C Cortex-A17	 ?
+C Cortex-A53	 ?
+
+
+define(`gp',    `r0')
+
+define(`u1',    `r1')
+define(`u0',    `r2')
+define(`v1',    `r3')
+define(`v0',    `r4')
+
+define(`t0',    `r5')
+define(`t1',    `r6')
+define(`cnt',   `r7')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+	push	{ r4-r7 }
+
+	ldr	v0, [sp,#16]		C
+
+L(top):	subs	t0, u0, v0		C 0 7
+	beq	L(lowz)
+	sbcs	t1, u1, v1		C 1 8
+
+	sub	cnt, v0, u0
+	and	cnt, cnt, t0
+
+	negcc	t0, t0
+	mvncc	t1, t1
+L(bck):	movcc	v0, u0
+	movcc	v1, u1
+
+	clz	r12, cnt		C 2
+	rsb	cnt, r12, #31		C 3
+	add	r12, r12, #1
+
+	lsr	u0, t0, cnt		C 3
+	lsl	r12, t1, r12		C 4
+	lsr	u1, t1, cnt		C 3
+	orr	u0, u0, r12		C 5
+
+	orrs	r12, u1, v1
+	bne	L(top)
+
+
+	str	r12, [gp,#4]		C high result limb <= 0
+
+	mov	r6, gp
+	mov	r0, u0			C pass 1st argument
+	mov	r1, v0			C pass 2nd argument
+	mov	r7, r14			C preserve link register
+	bl	mpn_gcd_11
+	str	r0, [r6,#0]
+	mov	r14, r7
+	pop	{ r4-r7 }
+	bx	r14
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	subs	t0, u1, v1
+	beq	L(end)
+	mov	t1, #0
+	sub	cnt, v1, u1
+	and	cnt, cnt, t0
+	negcc	t0, t0
+	b	L(bck)
+
+L(end):	str	v0, [gp,#0]
+	str	v1, [gp,#4]
+	pop	{ r4-r7 }
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v5/mod_1_1.asm b/mpn/arm/v5/mod_1_1.asm
new file mode 100644
index 0000000..6802346
--- /dev/null
+++ b/mpn/arm/v5/mod_1_1.asm

@@ -0,0 +1,129 @@
+dnl  ARM mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 7
+C Cortex-A15	 6
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+	push	{r4-r10}
+	add	r0, r0, r1, asl #2
+	ldr	r5, [r0, #-4]!
+	ldr	r12, [r0, #-4]!
+	subs	r1, r1, #2
+	ble	L(4)
+	ldr	r8, [r3, #12]
+	mov	r4, r12
+	mov	r10, r5
+	umull	r7, r5, r10, r8
+	sub	r1, r1, #1
+	b	L(mid)
+
+L(top):	adds	r12, r6, r7
+	adcs	r10, r4, r5
+	sub	r1, r1, #1
+	mov	r6, #0
+	movcs	r6, r8
+	umull	r7, r5, r10, r8
+	adds	r4, r12, r6
+	subcs	r4, r4, r2
+L(mid):	ldr	r6, [r0, #-4]!
+	teq	r1, #0
+	bne	L(top)
+
+	adds	r12, r6, r7
+	adcs	r5, r4, r5
+	subcs	r5, r5, r2
+L(4):	ldr	r1, [r3, #4]
+	cmp	r1, #0
+	beq	L(7)
+	ldr	r4, [r3, #8]
+	umull	r0, r6, r5, r4
+	adds	r12, r0, r12
+	addcs	r6, r6, #1
+	rsb	r0, r1, #32
+	mov	r0, r12, lsr r0
+	orr	r5, r0, r6, asl r1
+	mov	r12, r12, asl r1
+	b	L(8)
+L(7):	cmp	r5, r2
+	subcs	r5, r5, r2
+L(8):	ldr	r0, [r3, #0]
+	umull	r4, r3, r5, r0
+	add	r5, r5, #1
+	adds	r0, r4, r12
+	adc	r5, r3, r5
+	mul	r5, r2, r5
+	sub	r12, r12, r5
+	cmp	r12, r0
+	addhi	r12, r12, r2
+	cmp	r2, r12
+	subls	r12, r12, r2
+	mov	r0, r12, lsr r1
+	pop	{r4-r10}
+	bx	r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+	stmfd	sp!, {r4, r5, r6, r14}
+	mov	r5, r0
+	clz	r4, r1
+	mov	r0, r1, asl r4
+	rsb	r6, r0, #0
+	bl	mpn_invert_limb
+	str	r0, [r5, #0]
+	str	r4, [r5, #4]
+	cmp	r4, #0
+	beq	L(2)
+	rsb	r1, r4, #32
+	mov	r3, #1
+	mov	r3, r3, asl r4
+	orr	r3, r3, r0, lsr r1
+	mul	r3, r6, r3
+	mov	r4, r3, lsr r4
+	str	r4, [r5, #8]
+L(2):	mul	r0, r6, r0
+	str	r0, [r5, #12]
+	ldmfd	sp!, {r4, r5, r6, pc}
+EPILOGUE()

diff --git a/mpn/arm/v5/mod_1_2.asm b/mpn/arm/v5/mod_1_2.asm
new file mode 100644
index 0000000..fb70024
--- /dev/null
+++ b/mpn/arm/v5/mod_1_2.asm

@@ -0,0 +1,156 @@
+dnl  ARM mpn_mod_1s_2p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 4.25
+C Cortex-A15	 3
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+	push	{r4-r10}
+	tst	n, #1
+	add	r7, r3, #8
+	ldmia	r7, {r7, r8, r12}	C load B1, B2, B3
+	add	ap, ap, n, lsl #2	C put ap at operand end
+	beq	L(evn)
+
+L(odd):	subs	n, n, #1
+	beq	L(1)
+	ldmdb	ap!, {r4,r6,r9}
+	mov	r10, #0
+	umlal	r4, r10, r6, r7
+	umlal	r4, r10, r9, r8
+	b	L(com)
+
+L(evn):	ldmdb	ap!, {r4,r10}
+L(com):	subs	n, n, #2
+	ble	L(end)
+	ldmdb	ap!, {r5,r6}
+	b	L(mid)
+
+L(top):	mov	r9, #0
+	umlal	r5, r9, r6, r7		C B1
+	umlal	r5, r9, r4, r8		C B2
+	ldmdb	ap!, {r4,r6}
+	umlal	r5, r9, r10, r12	C B3
+	ble	L(xit)
+	mov	r10, #0
+	umlal	r4, r10, r6, r7		C B1
+	umlal	r4, r10, r5, r8		C B2
+	ldmdb	ap!, {r5,r6}
+	umlal	r4, r10, r9, r12	C B3
+L(mid):	subs	n, n, #4
+	bge	L(top)
+
+	mov	r9, #0
+	umlal	r5, r9, r6, r7		C B1
+	umlal	r5, r9, r4, r8		C B2
+	umlal	r5, r9, r10, r12	C B3
+	mov	r4, r5
+
+L(end):	movge	   r9, r10		C executed iff coming via xit
+	ldr	r6, [r3, #4]		C cps[1] = cnt
+	mov	r5, #0
+	umlal	r4, r5, r9, r7
+	mov	r7, r5, lsl r6
+L(x):	rsb	r1, r6, #32
+	orr	r8, r7, r4, lsr r1
+	mov	r9, r4, lsl r6
+	ldr	r5, [r3, #0]
+	add	r0, r8, #1
+	umull	r12, r1, r8, r5
+	adds	r4, r12, r9
+	adc	r1, r1, r0
+	mul	r5, r2, r1
+	sub	r9, r9, r5
+	cmp	r9, r4
+	addhi	r9, r9, r2
+	cmp	r2, r9
+	subls	r9, r9, r2
+	mov	r0, r9, lsr r6
+	pop	{r4-r10}
+	bx	r14
+
+L(xit):	mov	r10, #0
+	umlal	r4, r10, r6, r7		C B1
+	umlal	r4, r10, r5, r8		C B2
+	umlal	r4, r10, r9, r12	C B3
+	b	L(end)
+
+L(1):	ldr	r6, [r3, #4]		C cps[1] = cnt
+	ldr	r4, [ap, #-4]		C ap[0]
+	mov	r7, #0
+	b	L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+	push	{r4-r8, r14}
+	clz	r4, r1
+	mov	r5, r1, lsl r4		C b <<= cnt
+	mov	r6, r0			C r6 = cps
+	mov	r0, r5
+	bl	mpn_invert_limb
+	rsb	r3, r4, #32
+	mov	r3, r0, lsr r3
+	mov	r2, #1
+	orr	r3, r3, r2, lsl r4
+	rsb	r1, r5, #0
+	mul	r2, r1, r3
+	umull	r3, r12, r2, r0
+	add	r12, r2, r12
+	mvn	r12, r12
+	mul	r1, r5, r12
+	cmp	r1, r3
+	addhi	r1, r1, r5
+	umull	r12, r7, r1, r0
+	add	r7, r1, r7
+	mvn	r7, r7
+	mul	r3, r5, r7
+	cmp	r3, r12
+	addhi	r3, r3, r5
+	mov	r5, r2, lsr r4
+	mov	r7, r1, lsr r4
+	mov	r8, r3, lsr r4
+	stmia	r6, {r0,r4,r5,r7,r8}	C fill cps
+	pop	{r4-r8, pc}
+EPILOGUE()

diff --git a/mpn/arm/v6/addmul_1.asm b/mpn/arm/v6/addmul_1.asm
new file mode 100644
index 0000000..f60067e
--- /dev/null
+++ b/mpn/arm/v6/addmul_1.asm

@@ -0,0 +1,112 @@
+dnl  ARM mpn_addmul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 6.4
+C Cortex-A7	 5.25
+C Cortex-A8	 7
+C Cortex-A9	 3.25
+C Cortex-A15	 4
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, #0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up], #4
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #4
+	ldr	r7, [rp], #4
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	ldr	r6, [rp], #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up], #4
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #4
+	ldr	r7, [rp], #12
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r6, [rp, #-8]
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #-4]
+	ldr	r4, [up], #4
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	ldr	r6, [rp, #0]
+	ldr	r5, [up], #4
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #4]
+	ldr	r4, [up], #4
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	subs	n, n, #4
+	bhi	L(top)
+
+	ldr	r6, [rp, #-8]
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/mpn/arm/v6/addmul_2.asm b/mpn/arm/v6/addmul_2.asm
new file mode 100644
index 0000000..5b5ffeb
--- /dev/null
+++ b/mpn/arm/v6/addmul_2.asm

@@ -0,0 +1,125 @@
+dnl  ARM mpn_addmul_2.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2015 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 4.68
+C Cortex-A5	 3.63
+C Cortex-A7	 3.65
+C Cortex-A8	 4.0
+C Cortex-A9	 2.25
+C Cortex-A15	 2.5
+C Cortex-A17	 2.13
+C Cortex-A53	 3.5
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+	push	{ r4-r9 }
+
+	ldrd	v0, v1, [vp, #0]
+	mov	cya, #0
+	mov	cyb, #0
+
+	tst	n, #1
+	beq	L(evn)
+
+L(odd):	ldr	u1, [up, #0]
+	ldr	r4, [rp, #0]
+	tst	n, #2
+	beq	L(fi1)
+L(fi3):	sub	up, up, #8
+	sub	rp, rp, #8
+	b	L(lo3)
+L(fi1):	sub	n, n, #1
+	b	L(top)
+
+L(evn):	ldr	u0, [up, #0]
+	ldr	r5, [rp, #0]
+	tst	n, #2
+	bne	L(fi2)
+L(fi0):	sub	up, up, #4
+	sub	rp, rp, #4
+	b	L(lo0)
+L(fi2):	sub	up, up, #12
+	sub	rp, rp, #12
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+L(lo0):	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+L(lo3):	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+L(lo2):	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	n, n, #4
+	bhi	L(top)
+
+L(end):	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	mov	r0, cyb
+
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v6/addmul_3.asm b/mpn/arm/v6/addmul_3.asm
new file mode 100644
index 0000000..3d4ea26
--- /dev/null
+++ b/mpn/arm/v6/addmul_3.asm

@@ -0,0 +1,191 @@
+dnl  ARM mpn_addmul_3.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 4.33
+C Cortex-A5	 3.28
+C Cortex-A7	 3.25
+C Cortex-A8	 3.17
+C Cortex-A9	 2.125
+C Cortex-A15	 2
+C Cortex-A17	 2.11
+C Cortex-A53	 4.18
+
+C TODO
+C  * Use a fast path for n <= KARATSUBA_MUL_THRESHOLD using a jump table,
+C    avoiding the current multiply.
+C  * Start the first multiply or multiplies early.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r4')  define(`v1',`r5')  define(`v2',`r6')
+define(`u0',`r3')  define(`u1',`r14')
+define(`w0',`r7')  define(`w1',`r8')  define(`w2',`r9')
+define(`cy0',`r10')  define(`cy1',`r11') define(`cy2',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_3)
+	push	{ r4-r11, r14 }
+
+	ldr	w0, =0xaaaaaaab		C 3^{-1} mod 2^32
+	ldm	vp, { v0,v1,v2 }
+	mov	cy0, #0
+	mov	cy1, #0
+	mov	cy2, #0
+
+C Tricky n mod 6
+	mul	w0, w0, n		C n * 3^{-1} mod 2^32
+	and	w0, w0, #0xc0000001	C pseudo-CRT mod 3,2
+	sub	n, n, #3
+ifdef(`PIC',`
+	add	pc, pc, w0, ror $28
+	nop
+	b	L(b0)
+	b	L(b2)
+	b	L(b4)
+	.word	0xe7f000f0	C udf
+	b	L(b3)
+	b	L(b5)
+	b	L(b1)
+',`
+	ldr	pc, [pc, w0, ror $28]
+	nop
+	.word	L(b0), L(b2), L(b4), 0, L(b3), L(b5), L(b1)
+')
+
+L(b5):	add	up, up, #-8
+	ldr	w1, [rp, #0]
+	ldr	w2, [rp, #4]
+	ldr	u1, [up, #8]
+	b	L(lo5)
+
+L(b4):	add	rp, rp, #-4
+	add	up, up, #-12
+	ldr	w2, [rp, #4]
+	ldr	w0, [rp, #8]
+	ldr	u0, [up, #12]
+	b	L(lo4)
+
+L(b3):	add	rp, rp, #-8
+	add	up, up, #-16
+	ldr	w0, [rp, #8]
+	ldr	w1, [rp, #12]
+	ldr	u1, [up, #16]
+	b	L(lo3)
+
+L(b1):	add	rp, rp, #8
+	ldr	w2, [rp, #-8]
+	ldr	w0, [rp, #-4]
+	ldr	u1, [up, #0]
+	b	L(lo1)
+
+L(b0):	add	rp, rp, #4
+	add	up, up, #-4
+	ldr	w0, [rp, #-4]
+	ldr	w1, [rp, #0]
+	ldr	u0, [up, #4]
+	b	L(lo0)
+
+L(b2):	add	rp, rp, #12
+	add	up, up, #4
+	ldr	w1, [rp, #-12]
+	ldr	w2, [rp, #-8]
+	ldr	u0, [up, #-4]
+
+	ALIGN(16)
+L(top):	ldr	w0, [rp, #-4]
+	umaal	w1, cy0, u0, v0
+	ldr	u1, [up, #0]
+	umaal	w2, cy1, u0, v1
+	str	w1, [rp, #-12]
+	umaal	w0, cy2, u0, v2
+L(lo1):	ldr	w1, [rp, #0]
+	umaal	w2, cy0, u1, v0
+	ldr	u0, [up, #4]
+	umaal	w0, cy1, u1, v1
+	str	w2, [rp, #-8]
+	umaal	w1, cy2, u1, v2
+L(lo0):	ldr	w2, [rp, #4]
+	umaal	w0, cy0, u0, v0
+	ldr	u1, [up, #8]
+	umaal	w1, cy1, u0, v1
+	str	w0, [rp, #-4]
+	umaal	w2, cy2, u0, v2
+L(lo5):	ldr	w0, [rp, #8]
+	umaal	w1, cy0, u1, v0
+	ldr	u0, [up, #12]
+	umaal	w2, cy1, u1, v1
+	str	w1, [rp, #0]
+	umaal	w0, cy2, u1, v2
+L(lo4):	ldr	w1, [rp, #12]
+	umaal	w2, cy0, u0, v0
+	ldr	u1, [up, #16]
+	umaal	w0, cy1, u0, v1
+	str	w2, [rp, #4]
+	umaal	w1, cy2, u0, v2
+L(lo3):	ldr	w2, [rp, #16]
+	umaal	w0, cy0, u1, v0
+	ldr	u0, [up, #20]
+	umaal	w1, cy1, u1, v1
+	str	w0, [rp, #8]
+	umaal	w2, cy2, u1, v2
+L(lo2):	subs	n, n, #6
+	add	up, up, #24
+	add	rp, rp, #24
+	bge	L(top)
+
+L(end):	umaal	w1, cy0, u0, v0
+	ldr	u1, [up, #0]
+	umaal	w2, cy1, u0, v1
+	str	w1, [rp, #-12]
+	mov	w0, #0
+	umaal	w0, cy2, u0, v2
+	umaal	w2, cy0, u1, v0
+	umaal	w0, cy1, u1, v1
+	str	w2, [rp, #-8]
+	umaal	cy1, cy2, u1, v2
+	adds	w0, w0, cy0
+	str	w0, [rp, #-4]
+	adcs	w1, cy1, #0
+	str	w1, [rp, #0]
+	adc	r0, cy2, #0
+
+	pop	{ r4-r11, pc }
+EPILOGUE()

diff --git a/mpn/arm/v6/dive_1.asm b/mpn/arm/v6/dive_1.asm
new file mode 100644
index 0000000..974f8d4
--- /dev/null
+++ b/mpn/arm/v6/dive_1.asm

@@ -0,0 +1,149 @@
+dnl  ARM v6 mpn_divexact_1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C               cycles/limb       cycles/limb
+C               norm    unorm    modexact_1c_odd
+C StrongARM	 -	 -
+C XScale	 -	 -
+C Cortex-A7	 ?	 ?
+C Cortex-A8	 ?	 ?
+C Cortex-A9	 9	10		 9
+C Cortex-A15	 7	 7		 7
+
+C Architecture requirements:
+C v5	-
+C v5t	clz
+C v5te	-
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`d',  `r3')
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r10')
+
+ASM_START()
+PROLOGUE(mpn_divexact_1)
+	push	{r4,r5,r6,r7,r8,r9}
+
+	tst	d, #1
+
+	rsb	r4, d, #0
+	and	r4, r4, d
+	clz	r4, r4
+	rsb	cnt, r4, #31		C count_trailing_zeros
+	mov	d, d, lsr cnt
+
+C binvert limb
+	LEA(	r4, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	mul	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, lsl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, lsl #1	C r4 = inverse
+
+	ldr	r5, [up], #4		C up[0]
+	mov	cy, #0
+	rsb	r8, r4, #0		C r8 = -inverse
+	beq	L(unnorm)
+
+L(norm):
+	subs	n, n, #1
+	mul	r5, r5, r4
+	beq	L(end)
+
+	ALIGN(16)
+L(top):	ldr	r9, [up], #4
+	mov	r12, #0
+	str	r5, [rp], #4
+	umaal	r12, cy, r5, d
+	mul	r5, r9, r4
+	mla	r5, cy, r8, r5
+	subs	n, n, #1
+	bne	L(top)
+
+L(end):	str	r5, [rp]
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+
+L(unnorm):
+	push	{r10,r11}
+	rsb	tnc, cnt, #32
+	mov	r11, r5, lsr cnt
+	subs	n, n, #1
+	beq	L(edx)
+
+	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	mul	r5, r9, r4
+	subs	n, n, #1
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r11, r12, lsl tnc
+	mov	r11, r12, lsr cnt
+	mov	r12, #0
+	str	r5, [rp], #4
+	umaal	r12, cy, r5, d
+	mul	r5, r9, r4
+	mla	r5, cy, r8, r5
+	subs	n, n, #1
+	bne	L(tpu)
+
+L(edu):	str	r5, [rp], #4
+	mov	r12, #0
+	umaal	r12, cy, r5, d
+	mul	r5, r11, r4
+	mla	r5, cy, r8, r5
+	str	r5, [rp]
+	pop	{r10,r11}
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+
+L(edx):	mul	r5, r11, r4
+	str	r5, [rp]
+	pop	{r10,r11}
+	pop	{r4,r5,r6,r7,r8,r9}
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v6/mode1o.asm b/mpn/arm/v6/mode1o.asm
new file mode 100644
index 0000000..cd3c9bc
--- /dev/null
+++ b/mpn/arm/v6/mode1o.asm

@@ -0,0 +1,95 @@
+dnl  ARM v6 mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 9
+C Cortex-A15	 7
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	smulbb
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+	.protected	binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+	stmfd	sp!, {r4, r5, r6, r7}
+
+	LEA(	r4, binvert_limb_table)
+
+	ldr	r6, [up], #4		C up[0]
+
+	and	r12, d, #254
+	ldrb	r4, [r4, r12, lsr #1]
+	smulbb	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, asl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, asl #1	C r4 = inverse
+
+	subs	n, n, #1
+	sub	r6, r6, cy
+	mul	r6, r6, r4
+	beq	L(end)
+
+	rsb	r5, r4, #0		C r5 = -inverse
+
+L(top):	ldr	r7, [up], #4
+	mov	r12, #0
+	umaal	r12, cy, r6, d
+	mul	r6, r7, r4
+	mla	r6, cy, r5, r6
+	subs	n, n, #1
+	bne	L(top)
+
+L(end):	mov	r12, #0
+	umaal	r12, cy, r6, d
+	mov	r0, cy
+
+	ldmfd	sp!, {r4, r5, r6, r7}
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v6/mul_1.asm b/mpn/arm/v6/mul_1.asm
new file mode 100644
index 0000000..cd1b12b
--- /dev/null
+++ b/mpn/arm/v6/mul_1.asm

@@ -0,0 +1,115 @@
+dnl  ARM mpn_mul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 6.4
+C Cortex-A7	 5.25
+C Cortex-A8	 7
+C Cortex-A9	 3.25
+C Cortex-A15	 4
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, #0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #4
+	mov	r6, #0
+	ldr	r5, [up], #4
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #4
+	add	rp, rp, #4
+	mov	r7, #0
+	ldr	r4, [up], #4
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	mov	r6, #0
+	add	rp, rp, #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up], #4
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #4
+	add	rp, rp, #12
+	mov	r7, #0
+	ldr	r4, [up], #4
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	mov	r6, #0
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	mov	r7, #0
+	ldr	r4, [up], #4
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	mov	r6, #0
+	ldr	r5, [up], #4
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	mov	r7, #0
+	ldr	r4, [up], #4
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	subs	n, n, #4
+	bhi	L(top)
+
+	mov	r6, #0
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/mpn/arm/v6/mul_2.asm b/mpn/arm/v6/mul_2.asm
new file mode 100644
index 0000000..72badd9
--- /dev/null
+++ b/mpn/arm/v6/mul_2.asm

@@ -0,0 +1,135 @@
+dnl  ARM mpn_mul_2.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C ARM11		 5.25
+C Cortex-A5	 3.63
+C Cortex-A7	 3.15
+C Cortex-A8	 5.0
+C Cortex-A9	 2.25
+C Cortex-A15	 2.5
+C Cortex-A17	 2.13
+C Cortex-A53	 3.5
+
+C TODO
+C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
+C    and possible speedups to 2.0 c/l.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_2)
+	push	{ r4, r5, r6, r7, r8, r9 }
+
+	ldm	vp, { v0, v1 }
+	mov	cya, #0
+	mov	cyb, #0
+
+	tst	n, #1
+	beq	L(evn)
+L(odd):	mov	r5, #0
+	ldr	u0, [up, #0]
+	mov	r4, #0
+	tst	n, #2
+	beq	L(fi1)
+L(fi3):	sub	up, up, #12
+	sub	rp, rp, #16
+	b	L(lo3)
+L(fi1):	sub	n, n, #1
+	sub	up, up, #4
+	sub	rp, rp, #8
+	b	L(lo1)
+L(evn):	mov	r4, #0
+	ldr	u1, [up, #0]
+	mov	r5, #0
+	tst	n, #2
+	bne	L(fi2)
+L(fi0):	sub	up, up, #8
+	sub	rp, rp, #12
+	b	L(lo0)
+L(fi2):	subs	n, n, #2
+	sub	rp, rp, #4
+	bls	L(end)
+
+	ALIGN(16)
+L(top):	ldr	u0, [up, #4]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #4]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(lo1):	ldr	u1, [up, #8]
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #8]
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+L(lo0):	ldr	u0, [up, #12]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #12]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(lo3):	ldr	u1, [up, #16]!
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #16]!
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+	subs	n, n, #4
+	bhi	L(top)
+
+L(end):	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #4]
+	umaal	r5, cya, u0, v0
+	umaal	cya, cyb, u0, v1
+	str	r5, [rp, #8]
+	str	cya, [rp, #12]
+	mov	r0, cyb
+
+	pop	{ r4, r5, r6, r7, r8, r9 }
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v6/popham.asm b/mpn/arm/v6/popham.asm
new file mode 100644
index 0000000..612a6ad
--- /dev/null
+++ b/mpn/arm/v6/popham.asm

@@ -0,0 +1,139 @@
+dnl  ARM mpn_popcount and mpn_hamdist.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C		     popcount	      hamdist
+C		    cycles/limb	    cycles/limb
+C StrongARM		 -
+C XScale		 -
+C Cortex-A7		 ?
+C Cortex-A8		 ?
+C Cortex-A9		 8.94		 9.47
+C Cortex-A15		 5.67		 6.44
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	usada8
+C v6t2	-
+C v7a	-
+
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`ap',		`r0')
+  define(`n',		`r1')
+  define(`a0',		`r2')
+  define(`a1',		`r3')
+  define(`s',		`r5')
+  define(`b_01010101',	`r6')
+  define(`b_00110011',	`r7')
+  define(`b_00001111',	`r8')
+  define(`zero',	`r9')
+  define(`POPC',	`$1')
+  define(`HAMD',	`dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`ap',		`r0')
+  define(`bp',		`r1')
+  define(`n',		`r2')
+  define(`a0',		`r6')
+  define(`a1',		`r7')
+  define(`b0',		`r4')
+  define(`b1',		`r5')
+  define(`s',		`r11')
+  define(`b_01010101',	`r8')
+  define(`b_00110011',	`r9')
+  define(`b_00001111',	`r10')
+  define(`zero',	`r3')
+  define(`POPC',	`dnl')
+  define(`HAMD',	`$1')
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+ASM_START()
+PROLOGUE(func)
+POPC(`	push	{ r4-r9 }	')
+HAMD(`	push	{ r4-r11 }	')
+
+	ldr	b_01010101, =0x55555555
+	mov	r12, #0
+	ldr	b_00110011, =0x33333333
+	mov	zero, #0
+	ldr	b_00001111, =0x0f0f0f0f
+
+	tst	n, #1
+	beq	L(evn)
+
+L(odd):	ldr	a1, [ap], #4		C 1 x 32 1-bit accumulators, 0-1
+HAMD(`	ldr	b1, [bp], #4	')	C 1 x 32 1-bit accumulators, 0-1
+HAMD(`	eor	a1, a1, b1	')
+	and	r4, b_01010101, a1, lsr #1
+	sub	a1, a1, r4
+	and	r4, a1, b_00110011
+	bic	r5, a1, b_00110011
+	add	r5, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	subs	n, n, #1
+	b	L(mid)
+
+L(evn):	mov	s, #0
+
+L(top):	ldrd	a0, a1, [ap], #8	C 2 x 32 1-bit accumulators, 0-1
+HAMD(`	ldrd	b0, b1, [bp], #8')
+HAMD(`	eor	a0, a0, b0	')
+HAMD(`	eor	a1, a1, b1	')
+	subs	n, n, #2
+	usada8	r12, s, zero, r12
+	and	r4, b_01010101, a0, lsr #1
+	sub	a0, a0, r4
+	and	r4, b_01010101, a1, lsr #1
+	sub	a1, a1, r4
+	and	r4, a0, b_00110011
+	bic	r5, a0, b_00110011
+	add	a0, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	and	r4, a1, b_00110011
+	bic	r5, a1, b_00110011
+	add	a1, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
+	add	r5, a0, a1		C 8 4-bit accumulators, 0-8
+L(mid):	and	r4, r5, b_00001111
+	bic	r5, r5, b_00001111
+	add	s, r4, r5, lsr #4	C 4 8-bit accumulators
+	bne	L(top)
+
+	usada8	r0, s, zero, r12
+POPC(`	pop	{ r4-r9 }	')
+HAMD(`	pop	{ r4-r11 }	')
+	bx	r14
+EPILOGUE()

diff --git a/mpn/arm/v6/sqr_basecase.asm b/mpn/arm/v6/sqr_basecase.asm
new file mode 100644
index 0000000..72d7078
--- /dev/null
+++ b/mpn/arm/v6/sqr_basecase.asm

@@ -0,0 +1,544 @@
+dnl  ARM v6 mpn_sqr_basecase.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2015 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C Code structure:
+C
+C
+C        m_2(0m4)        m_2(2m4)        m_2(1m4)        m_2(3m4)
+C           |               |               |               |
+C           |               |               |               |
+C           |               |               |               |
+C          \|/             \|/             \|/             \|/
+C              ____________                   ____________
+C             /            \                 /            \
+C            \|/            \               \|/            \
+C         am_2(3m4)       am_2(1m4)       am_2(0m4)       am_2(2m4)
+C            \            /|\                \            /|\
+C             \____________/                  \____________/
+C                       \                        /
+C                        \                      /
+C                         \                    /
+C                         cor3             cor2
+C                            \              /
+C                             \            /
+C                            sqr_diag_addlsh1
+
+C TODO
+C  * Align more labels.
+C  * Further tweak counter and updates in outer loops.  (This could save
+C    perhaps 5n cycles).
+C  * Avoid sub-with-lsl in outer loops.  We could keep n up-shifted, then
+C    initialise loop counter i with a right shift.
+C  * Try to use fewer register.  Perhaps coalesce r9 branch target and n_saved.
+C    (This could save 2-3 cycles for n > 4.)
+C  * Optimise sqr_diag_addlsh1 loop.  The current code uses old-style carry
+C    propagation.
+C  * Stop loops earlier suppressing writes of upper-most rp[] values.
+C  * The addmul_2 loops here runs well on all cores, but mul_2 runs poorly
+C    particularly on Cortex-A8.
+
+
+define(`rp',      r0)
+define(`up',      r1)
+define(`n',       r2)
+
+define(`v0',      r3)
+define(`v1',      r6)
+define(`i',       r8)
+define(`n_saved', r14)
+define(`cya',     r11)
+define(`cyb',     r12)
+define(`u0',      r7)
+define(`u1',      r9)
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+	and	r12, n, #3
+	cmp	n, #4
+	addgt	r12, r12, #4
+	add	pc, pc, r12, lsl #2
+	nop
+	b	L(4)
+	b	L(1)
+	b	L(2)
+	b	L(3)
+	b	L(0m4)
+	b	L(1m4)
+	b	L(2m4)
+	b	L(3m4)
+
+
+L(1m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_2m4)-.-8
+	ldm	up, {v0,v1,u0}
+	sub	up, up, #4
+	mov	cyb, #0
+	mov	r5, #0
+	umull	r4, cya, v1, v0
+	str	r4, [rp], #-12
+	mov	r4, #0
+	b	L(ko0)
+
+L(3m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_0m4)-.-8
+	ldm	up, {v0,v1,u0}
+	add	up, up, #4
+	mov	cyb, #0
+	mov	r5, #0
+	umull	r4, cya, v1, v0
+	str	r4, [rp], #-4
+	mov	r4, #0
+	b	L(ko2)
+
+L(2m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_3m4)-.-8
+	ldm	up, {v0,v1,u1}
+	mov	cyb, #0
+	mov	r4, #0
+	umull	r5, cya, v1, v0
+	str	r5, [rp], #-8
+	mov	r5, #0
+	b	L(ko1)
+
+L(0m4):	push	{r4-r11, r14}
+	mov	n_saved, n
+	sub	i, n, #4
+	sub	n, n, #2
+	add	r10, pc, #L(am2_1m4)-.-8
+	ldm	up, {v0,v1,u1}
+	mov	cyb, #0
+	mov	r4, #0
+	add	up, up, #8
+	umull	r5, cya, v1, v0
+	str	r5, [rp, #0]
+	mov	r5, #0
+
+L(top):	ldr	u0, [up, #4]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #4]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(ko2):	ldr	u1, [up, #8]
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #8]
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+L(ko1):	ldr	u0, [up, #12]
+	umaal	r4, cya, u1, v0
+	str	r4, [rp, #12]
+	mov	r4, #0
+	umaal	r5, cyb, u1, v1
+L(ko0):	ldr	u1, [up, #16]!
+	umaal	r5, cya, u0, v0
+	str	r5, [rp, #16]!
+	mov	r5, #0
+	umaal	r4, cyb, u0, v1
+	subs	i, i, #4
+	bhi	L(top)
+
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #4]
+	umaal	r5, cya, u0, v0
+	umaal	cya, cyb, u0, v1
+	str	r5, [rp, #8]
+	str	cya, [rp, #12]
+	str	cyb, [rp, #16]
+
+	add	up, up, #4
+	sub	n, n, #1
+	add	rp, rp, #8
+	bx	r10
+
+L(evnloop):
+	subs	i, n, #6
+	sub	n, n, #2
+	blt	L(cor2)
+	ldm	up, {v0,v1,u1}
+	add	up, up, #8
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r4, [rp, #-4]
+	umaal	r4, cya, v1, v0
+	str	r4, [rp, #-4]
+	ldr	r4, [rp, #0]
+
+	ALIGN(16)
+L(ua2):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua2)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_0m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #8
+
+	sub	i, n, #4
+	sub	n, n, #2
+	ldm	up, {v0,v1,u1}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r4, [rp, #4]
+	umaal	r4, cya, v1, v0
+	str	r4, [rp, #4]
+	ldr	r4, [rp, #8]
+	b	L(lo0)
+
+	ALIGN(16)
+L(ua0):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+L(lo0):	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua0)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_2m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #16
+	b	L(evnloop)
+
+
+L(oddloop):
+	sub	i, n, #5
+	sub	n, n, #2
+	ldm	up, {v0,v1,u0}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r5, [rp, #0]
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #0]
+	ldr	r5, [rp, #4]
+	add	up, up, #4
+	b	L(lo1)
+
+	ALIGN(16)
+L(ua1):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+L(lo1):	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua1)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_3m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #4
+
+	subs	i, n, #3
+	beq	L(cor3)
+	sub	n, n, #2
+	ldm	up, {v0,v1,u0}
+	mov	cya, #0
+	mov	cyb, #0
+	ldr	r5, [rp, #8]
+	sub	up, up, #4
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #8]
+	ldr	r5, [rp, #12]
+	b	L(lo3)
+
+	ALIGN(16)
+L(ua3):	ldr	r5, [rp, #4]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #4]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #0]
+	ldr	r4, [rp, #8]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #8]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #4]
+	ldr	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	ldr	u0, [up, #12]
+	umaal	r5, cyb, u1, v1
+	str	r4, [rp, #8]
+L(lo3):	ldr	r4, [rp, #16]!
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #16]!
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #-4]
+	subs	i, i, #4
+	bhs	L(ua3)
+
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #0]
+	str	cya, [rp, #4]
+	str	cyb, [rp, #8]
+L(am2_1m4):
+	sub	rp, rp, n, lsl #2
+	sub	up, up, n, lsl #2
+	add	rp, rp, #12
+	b	L(oddloop)
+
+
+L(cor3):ldm	up, {v0,v1,u0}
+	ldr	r5, [rp, #8]
+	mov	cya, #0
+	mov	cyb, #0
+	umaal	r5, cya, v1, v0
+	str	r5, [rp, #8]
+	ldr	r5, [rp, #12]
+	ldr	r4, [rp, #16]
+	umaal	r5, cya, u0, v0
+	ldr	u1, [up, #12]
+	umaal	r4, cyb, u0, v1
+	str	r5, [rp, #12]
+	umaal	r4, cya, u1, v0
+	umaal	cya, cyb, u1, v1
+	str	r4, [rp, #16]
+	str	cya, [rp, #20]
+	str	cyb, [rp, #24]
+	add	up, up, #16
+	mov	cya, cyb
+	adds	rp, rp, #36		C clear cy
+	mov	cyb, #0
+	umaal	cya, cyb, u1, u0
+	b	L(sqr_diag_addlsh1)
+
+L(cor2):
+	ldm	up!, {v0,v1,u0}
+	mov	r4, cya
+	mov	r5, cyb
+	mov	cya, #0
+	umaal	r4, cya, v1, v0
+	mov	cyb, #0
+	umaal	r5, cya, u0, v0
+	strd	r4, r5, [rp, #-4]
+	umaal	cya, cyb, u0, v1
+	add	rp, rp, #16
+C	b	L(sqr_diag_addlsh1)
+
+
+define(`w0',  r6)
+define(`w1',  r7)
+define(`w2',  r8)
+define(`rbx', r9)
+
+L(sqr_diag_addlsh1):
+	str	cya, [rp, #-12]
+	str	cyb, [rp, #-8]
+	sub	n, n_saved, #1
+	sub	up, up, n_saved, lsl #2
+	sub	rp, rp, n_saved, lsl #3
+	ldr	r3, [up], #4
+	umull	w1, r5, r3, r3
+	mov	w2, #0
+	mov	r10, #0
+C	cmn	r0, #0			C clear cy (already clear)
+	b	L(lm)
+
+L(tsd):	adds	w0, w0, rbx
+	adcs	w1, w1, r4
+	str	w0, [rp, #0]
+L(lm):	ldr	w0, [rp, #4]
+	str	w1, [rp, #4]
+	ldr	w1, [rp, #8]!
+	add	rbx, r5, w2
+	adcs	w0, w0, w0
+	ldr	r3, [up], #4
+	adcs	w1, w1, w1
+	adc	w2, r10, r10
+	umull	r4, r5, r3, r3
+	subs	n, n, #1
+	bne	L(tsd)
+
+	adds	w0, w0, rbx
+	adcs	w1, w1, r4
+	adc	w2, r5, w2
+	stm	rp, {w0,w1,w2}
+
+	pop	{r4-r11, pc}
+
+
+C Straight line code for n <= 4
+
+L(1):	ldr	r3, [up, #0]
+	umull	r1, r2, r3, r3
+	stm	rp, {r1,r2}
+	bx	r14
+
+L(2):	push	{r4-r5}
+	ldm	up, {r5,r12}
+	umull	r1, r2, r5, r5
+	umull	r3, r4, r12, r12
+	umull	r5, r12, r5, r12
+	adds	r5, r5, r5
+	adcs	r12, r12, r12
+	adc	r4, r4, #0
+	adds	r2, r2, r5
+	adcs	r3, r3, r12
+	adc	r4, r4, #0
+	stm	rp, {r1,r2,r3,r4}
+	pop	{r4-r5}
+	bx	r14
+
+L(3):	push	{r4-r11}
+	ldm	up, {r7,r8,r9}
+	umull	r1, r2, r7, r7
+	umull	r3, r4, r8, r8
+	umull	r5, r6, r9, r9
+	umull	r10, r11, r7, r8
+	mov	r12, #0
+	umlal	r11, r12, r7, r9
+	mov	r7, #0
+	umlal	r12, r7, r8, r9
+	adds	r10, r10, r10
+	adcs	r11, r11, r11
+	adcs	r12, r12, r12
+	adcs	r7, r7, r7
+	adc	r6, r6, #0
+	adds	r2, r2, r10
+	adcs	r3, r3, r11
+	adcs	r4, r4, r12
+	adcs	r5, r5, r7
+	adc	r6, r6, #0
+	stm	rp, {r1,r2,r3,r4,r5,r6}
+	pop	{r4-r11}
+	bx	r14
+
+L(4):	push	{r4-r11, r14}
+	ldm	up, {r9,r10,r11,r12}
+	umull	r1, r2, r9, r9
+	umull	r3, r4, r10, r10
+	umull	r5, r6, r11, r11
+	umull	r7, r8, r12, r12
+	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
+	umull	r1, r2, r9, r10
+	mov	r3, #0
+	umlal	r2, r3, r9, r11
+	mov	r4, #0
+	umlal	r3, r4, r9, r12
+	mov	r5, #0
+	umlal	r3, r5, r10, r11
+	umaal	r4, r5, r10, r12
+	mov	r6, #0
+	umlal	r5, r6, r11, r12
+	adds	r1, r1, r1
+	adcs	r2, r2, r2
+	adcs	r3, r3, r3
+	adcs	r4, r4, r4
+	adcs	r5, r5, r5
+	adcs	r6, r6, r6
+	add	rp, rp, #4
+	adc	r7, r8, #0
+	ldm	rp, {r8,r9,r10,r11,r12,r14}
+	adds	r1, r1, r8
+	adcs	r2, r2, r9
+	adcs	r3, r3, r10
+	adcs	r4, r4, r11
+	adcs	r5, r5, r12
+	adcs	r6, r6, r14
+	adc	r7, r7, #0
+	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
+	pop	{r4-r11, pc}
+EPILOGUE()

diff --git a/mpn/arm/v6/submul_1.asm b/mpn/arm/v6/submul_1.asm
new file mode 100644
index 0000000..59772fd
--- /dev/null
+++ b/mpn/arm/v6/submul_1.asm

@@ -0,0 +1,125 @@
+dnl  ARM mpn_submul_1.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.75
+C Cortex-A15	 4.0
+
+C This loop complements U on the fly,
+C   U' = B^n - 1 - U
+C and then uses that
+C   R - U*v = R + U'*v + v - B^n v
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	stmfd	sp!, { r4, r5, r6, r7 }
+
+	ands	r6, n, #3
+	mov	r12, v0
+	beq	L(fi0)
+	cmp	r6, #2
+	bcc	L(fi1)
+	beq	L(fi2)
+
+L(fi3):	ldr	r4, [up], #12
+	mvn	r4, r4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up, #-8]
+	b	L(lo3)
+
+L(fi0):	ldr	r5, [up], #16
+	mvn	r5, r5
+	ldr	r7, [rp], #4
+	ldr	r4, [up, #-12]
+	b	L(lo0)
+
+L(fi1):	ldr	r4, [up], #4
+	mvn	r4, r4
+	ldr	r6, [rp], #8
+	subs	n, n, #1
+	beq	L(1)
+	ldr	r5, [up]
+	b	L(lo1)
+
+L(fi2):	ldr	r5, [up], #8
+	mvn	r5, r5
+	ldr	r7, [rp], #12
+	ldr	r4, [up, #-4]
+	b	L(lo2)
+
+	ALIGN(16)
+L(top):	ldr	r6, [rp, #-8]
+	ldr	r5, [up]
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	add	up, up, #16
+	mvn	r5, r5
+	ldr	r7, [rp, #-4]
+	ldr	r4, [up, #-12]
+	str	r6, [rp, #-8]
+L(lo0):	umaal	r7, r12, r5, v0
+	mvn	r4, r4
+	ldr	r6, [rp, #0]
+	ldr	r5, [up, #-8]
+	str	r7, [rp, #-4]
+L(lo3):	umaal	r6, r12, r4, v0
+	mvn	r5, r5
+	ldr	r7, [rp, #4]
+	ldr	r4, [up, #-4]
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	mvn	r4, r4
+	subs	n, n, #4
+	bhi	L(top)
+
+	ldr	r6, [rp, #-8]
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
+	str	r6, [rp, #-8]
+	sub	r0, v0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
+	bx	lr
+EPILOGUE()

diff --git a/mpn/arm64/aors_n.asm b/mpn/arm64/aors_n.asm
new file mode 100644
index 0000000..2e870f5
--- /dev/null
+++ b/mpn/arm64/aors_n.asm

@@ -0,0 +1,125 @@
+dnl  ARM64 mpn_add_n and mpn_sub_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	2.75-3.25
+C Cortex-A57	 1.5
+C X-Gene	 2.0
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n',  `x3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUBC',	adcs)
+  define(`CLRCY',	`cmn	xzr, xzr')
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`cset	x0, cs')
+  define(`func_n',	mpn_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUBC',	sbcs)
+  define(`CLRCY',	`cmp	xzr, xzr')
+  define(`SETCY',	`cmp	xzr, $1')
+  define(`RETVAL',	`cset	x0, cc')
+  define(`func_n',	mpn_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	SETCY(	x4)
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(func_n)
+	CLRCY
+L(ent):	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x7, [up]
+	ldr	x11, [vp]
+	ADDSUBC	x13, x7, x11
+	str	x13, [rp],#8
+	tbnz	n, #1, L(b11)
+
+L(b01):	cbz	x17, L(ret)
+	ldp	x4, x5, [up,#8]
+	ldp	x8, x9, [vp,#8]
+	sub	up, up, #8
+	sub	vp, vp, #8
+	b	L(mid)
+
+L(b11):	ldp	x6, x7, [up,#8]
+	ldp	x10, x11, [vp,#8]
+	add	up, up, #8
+	add	vp, vp, #8
+	cbz	x17, L(end)
+	b	L(top)
+
+L(bx0):	tbnz	n, #1, L(b10)
+
+L(b00):	ldp	x4, x5, [up]
+	ldp	x8, x9, [vp]
+	sub	up, up, #16
+	sub	vp, vp, #16
+	b	L(mid)
+
+L(b10):	ldp	x6, x7, [up]
+	ldp	x10, x11, [vp]
+	cbz	x17, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#16]
+	ldp	x8, x9, [vp,#16]
+	ADDSUBC	x12, x6, x10
+	ADDSUBC	x13, x7, x11
+	stp	x12, x13, [rp],#16
+L(mid):	ldp	x6, x7, [up,#32]!
+	ldp	x10, x11, [vp,#32]!
+	ADDSUBC	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	stp	x12, x13, [rp],#16
+	sub	x17, x17, #1
+	cbnz	x17, L(top)
+
+L(end):	ADDSUBC	x12, x6, x10
+	ADDSUBC	x13, x7, x11
+	stp	x12, x13, [rp]
+L(ret):	RETVAL
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/aorsmul_1.asm b/mpn/arm64/aorsmul_1.asm
new file mode 100644
index 0000000..5007a02
--- /dev/null
+++ b/mpn/arm64/aorsmul_1.asm

@@ -0,0 +1,145 @@
+dnl  ARM64 mpn_addmul_1 and mpn_submul_1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	       addmul_1        submul_1
+C	     cycles/limb     cycles/limb
+C Cortex-A53	9.3-9.8		9.3-9.8
+C Cortex-A55    9.0-9.5		9.3-9.8
+C Cortex-A57	 7		 7
+C Cortex-A72
+C Cortex-A73	 6		 6
+C X-Gene	 5		 5
+C Apple M1	 1.75		 1.75
+
+C NOTES
+C  * It is possible to keep the carry chain alive between the addition blocks
+C    and thus avoid csinc, but only for addmul_1.  Since that saves no time
+C    on the tested pipelines, we keep addmul_1 and submul_1 similar.
+C  * We could separate feed-in into 4 blocks, one for each residue (mod 4).
+C    That is likely to save a few cycles.
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+define(`v0', `x3')
+
+ifdef(`OPERATION_addmul_1', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`COND',	`cc')
+  define(`func',	mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`COND',	`cs')
+  define(`func',	mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+PROLOGUE(func)
+	adds	x15, xzr, xzr
+
+	tbz	n, #0, L(1)
+
+	ldr	x4, [up],#8
+	mul	x8, x4, v0
+	umulh	x12, x4, v0
+	ldr	x4, [rp]
+	ADDSUB	x8, x4, x8
+	csinc	x15, x12, x12, COND
+	str	x8, [rp],#8
+
+L(1):	tbz	n, #1, L(2)
+
+	ldp	x4, x5, [up],#16
+	mul	x8, x4, v0
+	umulh	x12, x4, v0
+	mul	x9, x5, v0
+	umulh	x13, x5, v0
+	adds	x8, x8, x15
+	adcs	x9, x9, x12
+	ldp	x4, x5, [rp]
+	adc	x15, x13, xzr
+	ADDSUB	x8, x4, x8
+	ADDSUBC	x9, x5, x9
+	csinc	x15, x15, x15, COND
+	stp	x8, x9, [rp],#16
+
+L(2):	lsr	n, n, #2
+	cbz	n, L(le3)
+	ldp	x4, x5, [up],#32
+	ldp	x6, x7, [up,#-16]
+	b	L(mid)
+L(le3):	mov	x0, x15
+	ret
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up],#32
+	ldp	x6, x7, [up,#-16]
+	ADDSUB	x8, x16, x8
+	ADDSUBC	x9, x17, x9
+	stp	x8, x9, [rp],#32
+	ADDSUBC	x10, x12, x10
+	ADDSUBC	x11, x13, x11
+	stp	x10, x11, [rp,#-16]
+	csinc	x15, x15, x15, COND
+L(mid):	sub	n, n, #1
+	mul	x8, x4, v0
+	umulh	x12, x4, v0
+	mul	x9, x5, v0
+	umulh	x13, x5, v0
+	adds	x8, x8, x15
+	mul	x10, x6, v0
+	umulh	x14, x6, v0
+	adcs	x9, x9, x12
+	mul	x11, x7, v0
+	umulh	x15, x7, v0
+	adcs	x10, x10, x13
+	ldp	x16, x17, [rp]
+	adcs	x11, x11, x14
+	ldp	x12, x13, [rp,#16]
+	adc	x15, x15, xzr
+	cbnz	n, L(top)
+
+	ADDSUB	x8, x16, x8
+	ADDSUBC	x9, x17, x9
+	ADDSUBC	x10, x12, x10
+	ADDSUBC	x11, x13, x11
+	stp	x8, x9, [rp]
+	stp	x10, x11, [rp,#16]
+	csinc	x0, x15, x15, COND
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/aorsorrlsh1_n.asm b/mpn/arm64/aorsorrlsh1_n.asm
new file mode 100644
index 0000000..c398957
--- /dev/null
+++ b/mpn/arm64/aorsorrlsh1_n.asm

@@ -0,0 +1,43 @@
+dnl  ARM64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+define(LSH,		1)
+define(RSH,		63)
+
+ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+
+include_mpn(`arm64/aorsorrlshC_n.asm')

diff --git a/mpn/arm64/aorsorrlsh2_n.asm b/mpn/arm64/aorsorrlsh2_n.asm
new file mode 100644
index 0000000..4139027
--- /dev/null
+++ b/mpn/arm64/aorsorrlsh2_n.asm

@@ -0,0 +1,43 @@
+dnl  ARM64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+define(LSH,		2)
+define(RSH,		62)
+
+ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+
+include_mpn(`arm64/aorsorrlshC_n.asm')

diff --git a/mpn/arm64/aorsorrlshC_n.asm b/mpn/arm64/aorsorrlshC_n.asm
new file mode 100644
index 0000000..61c506a
--- /dev/null
+++ b/mpn/arm64/aorsorrlshC_n.asm

@@ -0,0 +1,139 @@
+dnl  ARM64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	3.25-3.75
+C Cortex-A57	 2.18
+C X-Gene	 2.5
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n',  `x3')
+
+ifdef(`DO_add', `
+  define(`ADDSUB',	`adds	$1, $2, $3')
+  define(`ADDSUBC',	`adcs	$1, $2, $3')
+  define(`CLRRCY',	`adds	$1, xzr, xzr')
+  define(`RETVAL',	`adc	x0, $1, xzr')
+  define(`func_n',	mpn_addlsh`'LSH`'_n)')
+ifdef(`DO_sub', `
+  define(`ADDSUB',	`subs	$1, $3, $2')
+  define(`ADDSUBC',	`sbcs	$1, $3, $2')
+  define(`CLRRCY',	`subs	$1, xzr, xzr')
+  define(`RETVAL',	`cinc	x0, $1, cc')
+  define(`func_n',	mpn_sublsh`'LSH`'_n)')
+ifdef(`DO_rsb', `
+  define(`ADDSUB',	`subs	$1, $2, $3')
+  define(`ADDSUBC',	`sbcs	$1, $2, $3')
+  define(`CLRRCY',	`subs	$1, xzr, xzr')
+  define(`RETVAL',	`sbc	x0, $1, xzr')
+  define(`func_n',	mpn_rsblsh`'LSH`'_n)')
+
+ASM_START()
+PROLOGUE(func_n)
+	lsr	x6, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x5, [up]
+	tbnz	n, #1, L(b11)
+
+L(b01):	ldr	x11, [vp]
+	cbz	x6, L(1)
+	ldp	x8, x9, [vp,#8]
+	lsl	x13, x11, #LSH
+	ADDSUB(	x15, x13, x5)
+	str	x15, [rp],#8
+	sub	up, up, #24
+	sub	vp, vp, #8
+	b	L(mid)
+
+L(1):	lsl	x13, x11, #LSH
+	ADDSUB(	x15, x13, x5)
+	str	x15, [rp]
+	lsr	x0, x11, RSH
+	RETVAL(	 x0, x1)
+	ret
+
+L(b11):	ldr	x9, [vp]
+	ldp	x10, x11, [vp,#8]!
+	lsl	x13, x9, #LSH
+	ADDSUB(	x17, x13, x5)
+	str	x17, [rp],#8
+	sub	up, up, #8
+	cbz	x6, L(end)
+	b	L(top)
+
+L(bx0):	tbnz	n, #1, L(b10)
+
+L(b00):	CLRRCY(	x11)
+	ldp	x8, x9, [vp],#-16
+	sub	up, up, #32
+	b	L(mid)
+
+L(b10):	CLRRCY(	x9)
+	ldp	x10, x11, [vp]
+	sub	up, up, #16
+	cbz	x6, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#16]
+	extr	x12, x10, x9, #RSH
+	ldp	x8, x9, [vp,#16]
+	extr	x13, x11, x10, #RSH
+	ADDSUBC(x14, x12, x4)
+	ADDSUBC(x15, x13, x5)
+	stp	x14, x15, [rp],#16
+L(mid):	ldp	x4, x5, [up,#32]!
+	extr	x12, x8, x11, #RSH
+	ldp	x10, x11, [vp,#32]!
+	extr	x13, x9, x8, #RSH
+	ADDSUBC(x16, x12, x4)
+	ADDSUBC(x17, x13, x5)
+	stp	x16, x17, [rp],#16
+	sub	x6, x6, #1
+	cbnz	x6, L(top)
+
+L(end):	ldp	x4, x5, [up,#16]
+	extr	x12, x10, x9, #RSH
+	extr	x13, x11, x10, #RSH
+	ADDSUBC(x14, x12, x4)
+	ADDSUBC(x15, x13, x5)
+	stp	x14, x15, [rp]
+	lsr	x0, x11, RSH
+	RETVAL(	 x0, x1)
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/arm64-defs.m4 b/mpn/arm64/arm64-defs.m4
new file mode 100644
index 0000000..46149f7
--- /dev/null
+++ b/mpn/arm64/arm64-defs.m4

@@ -0,0 +1,53 @@
+divert(-1)
+
+dnl  m4 macros for ARM64 ELF assembler.
+
+dnl  Copyright 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Standard commenting is with @, the default m4 # is for constants and we
+dnl  don't want to disable macro expansions in or after them.
+
+changecom
+
+
+dnl  LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
+dnl
+dnl  Load the address of gmp_symbol into a register. We split this into two
+dnl  parts to allow separation for manual insn scheduling.
+
+ifdef(`PIC',`dnl
+define(`LEA_HI', `adrp	$1, :got:$2')dnl
+define(`LEA_LO', `ldr	$1, [$1, #:got_lo12:$2]')dnl
+',`dnl
+define(`LEA_HI', `adrp	$1, $2')dnl
+define(`LEA_LO', `add	$1, $1, :lo12:$2')dnl
+')dnl
+
+divert`'dnl

diff --git a/mpn/arm64/bdiv_dbm1c.asm b/mpn/arm64/bdiv_dbm1c.asm
new file mode 100644
index 0000000..f684d0a
--- /dev/null
+++ b/mpn/arm64/bdiv_dbm1c.asm

@@ -0,0 +1,111 @@
+dnl  ARM64 mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008, 2011, 2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 8
+C Cortex-A57	 7
+C X-Gene	 4.25
+
+define(`qp',	  `x0')
+define(`up',	  `x1')
+define(`n',	  `x2')
+define(`bd',	  `x3')
+define(`cy',	  `x4')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+	ldr	x5, [up], #8
+	ands	x6, n, #3
+	b.eq	L(fi0)
+	cmp	x6, #2
+	b.cc	L(fi1)
+	b.eq	L(fi2)
+
+L(fi3):	mul	x12, x5, bd
+	umulh	x13, x5, bd
+	ldr	x5, [up], #8
+	b	L(lo3)
+
+L(fi0):	mul	x10, x5, bd
+	umulh	x11, x5, bd
+	ldr	x5, [up], #8
+	b	L(lo0)
+
+L(fi1):	subs	n, n, #1
+	mul	x12, x5, bd
+	umulh	x13, x5, bd
+	b.ls	L(wd1)
+	ldr	x5, [up], #8
+	b	L(lo1)
+
+L(fi2):	mul	x10, x5, bd
+	umulh	x11, x5, bd
+	ldr	x5, [up], #8
+	b	L(lo2)
+
+L(top):	ldr	x5, [up], #8
+	subs	x4, x4, x10
+	str	x4, [qp], #8
+	sbc	x4, x4, x11
+L(lo1):	mul	x10, x5, bd
+	umulh	x11, x5, bd
+	ldr	x5, [up], #8
+	subs	x4, x4, x12
+	str	x4, [qp], #8
+	sbc	x4, x4, x13
+L(lo0):	mul	x12, x5, bd
+	umulh	x13, x5, bd
+	ldr	x5, [up], #8
+	subs	x4, x4, x10
+	str	x4, [qp], #8
+	sbc	x4, x4, x11
+L(lo3):	mul	x10, x5, bd
+	umulh	x11, x5, bd
+	ldr	x5, [up], #8
+	subs	x4, x4, x12
+	str	x4, [qp], #8
+	sbc	x4, x4, x13
+L(lo2):	subs	n, n, #4
+	mul	x12, x5, bd
+	umulh	x13, x5, bd
+	b.hi	L(top)
+
+L(wd2):	subs	x4, x4, x10
+	str	x4, [qp], #8
+	sbc	x4, x4, x11
+L(wd1):	subs	x4, x4, x12
+	str	x4, [qp]
+	sbc	x0, x4, x13
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/bdiv_q_1.asm b/mpn/arm64/bdiv_q_1.asm
new file mode 100644
index 0000000..7061255
--- /dev/null
+++ b/mpn/arm64/bdiv_q_1.asm

@@ -0,0 +1,122 @@
+dnl  ARM64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C Cortex-A53	12	15
+C Cortex-A57	12	12
+C Cortex-A72
+C Cortex-A73
+C X-Gene	11	11
+
+C TODO
+C  * Scheduling of umulh later in the unorm loop brings A53 time to 12 c/l.
+C    Unfortunately, that requires software pipelining.
+
+define(`rp',  `x0')
+define(`up',  `x1')
+define(`n',   `x2')
+define(`d',   `x3')
+define(`di',  `x4')		C	just mpn_pi1_bdiv_q_1
+define(`cnt', `x5')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`tnc', `x8')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+
+	rbit	x6, d
+	clz	cnt, x6
+	lsr	d, d, cnt
+
+	LEA_HI(	x7, binvert_limb_table)
+	ubfx	x6, d, 1, 7
+	LEA_LO(	x7, binvert_limb_table)
+	ldrb	w6, [x7, x6]
+	ubfiz	x7, x6, 1, 8
+	umull	x6, w6, w6
+	msub	x6, x6, d, x7
+	lsl	x7, x6, 1
+	mul	x6, x6, x6
+	msub	x6, x6, d, x7
+	lsl	x7, x6, 1
+	mul	x6, x6, x6
+	msub	di, x6, d, x7
+
+	b	GSYM_PREFIX`'mpn_pi1_bdiv_q_1
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	sub	n, n, #1
+	subs	x6, x6, x6		C clear r6 and C flag
+	ldr	x9, [up],#8
+	cbz	cnt, L(norm)
+
+L(unorm):
+	lsr	x12, x9, cnt
+	cbz	n, L(eu1)
+	sub	tnc, xzr, cnt
+
+L(tpu):	ldr	x9, [up],#8
+	lsl	x7, x9, tnc
+	orr	x7, x7, x12
+	sbcs	x6, x7, x6
+	mul	x7, x6, di
+	str	x7, [rp],#8
+	lsr	x12, x9, cnt
+	umulh	x6, x7, d
+	sub	n, n, #1
+	cbnz	n, L(tpu)
+
+L(eu1):	sbcs	x6, x12, x6
+	mul	x6, x6, di
+	str	x6, [rp]
+	ret
+
+L(norm):
+	mul	x5, x9, di
+	str	x5, [rp],#8
+	cbz	n, L(en1)
+
+L(tpn):	ldr	x9, [up],#8
+	umulh	x5, x5, d
+	sbcs	x5, x9, x5
+	mul	x5, x5, di
+	str	x5, [rp],#8
+	sub	n, n, #1
+	cbnz	n, L(tpn)
+
+L(en1):	ret
+EPILOGUE()

diff --git a/mpn/arm64/cnd_aors_n.asm b/mpn/arm64/cnd_aors_n.asm
new file mode 100644
index 0000000..8b39d36
--- /dev/null
+++ b/mpn/arm64/cnd_aors_n.asm

@@ -0,0 +1,129 @@
+dnl  ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	3.87-4.37
+C Cortex-A57	 1.75
+C X-Gene	 2.0
+
+changecom(blah)
+
+define(`cnd',	`x0')
+define(`rp',	`x1')
+define(`up',	`x2')
+define(`vp',	`x3')
+define(`n',	`x4')
+
+ifdef(`OPERATION_cnd_add_n', `
+  define(`ADDSUBC',	adcs)
+  define(`CLRCY',	`cmn	xzr, xzr')
+  define(`RETVAL',	`cset	x0, cs')
+  define(`func',	mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+  define(`ADDSUBC',	sbcs)
+  define(`CLRCY',	`cmp	xzr, xzr')
+  define(`RETVAL',	`cset	x0, cc')
+  define(`func',	mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd
+
+	CLRCY
+
+	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x13, [vp]
+	ldr	x11, [up]
+	bic	x7, x13, cnd
+	ADDSUBC	x9, x11, x7
+	str	x9, [rp]
+	tbnz	n, #1, L(b11)
+
+L(b01):	cbz	x17, L(rt)
+	ldp	x12, x13, [vp,#8]
+	ldp	x10, x11, [up,#8]
+	sub	up, up, #8
+	sub	vp, vp, #8
+	sub	rp, rp, #24
+	b	L(mid)
+
+L(b11):	ldp	x12, x13, [vp,#8]!
+	ldp	x10, x11, [up,#8]!
+	sub	rp, rp, #8
+	cbz	x17, L(end)
+	b	L(top)
+
+L(bx0):	ldp	x12, x13, [vp]
+	ldp	x10, x11, [up]
+	tbnz	n, #1, L(b10)
+
+L(b00):	sub	up, up, #16
+	sub	vp, vp, #16
+	sub	rp, rp, #32
+	b	L(mid)
+
+L(b10):	sub	rp, rp, #16
+	cbz	x17, L(end)
+
+	ALIGN(16)
+L(top):	bic	x6, x12, cnd
+	bic	x7, x13, cnd
+	ldp	x12, x13, [vp,#16]
+	ADDSUBC	x8, x10, x6
+	ADDSUBC	x9, x11, x7
+	ldp	x10, x11, [up,#16]
+	stp	x8, x9, [rp,#16]
+L(mid):	bic	x6, x12, cnd
+	bic	x7, x13, cnd
+	ldp	x12, x13, [vp,#32]!
+	ADDSUBC	x8, x10, x6
+	ADDSUBC	x9, x11, x7
+	ldp	x10, x11, [up,#32]!
+	stp	x8, x9, [rp,#32]!
+	sub	x17, x17, #1
+	cbnz	x17, L(top)
+
+L(end):	bic	x6, x12, cnd
+	bic	x7, x13, cnd
+	ADDSUBC	x8, x10, x6
+	ADDSUBC	x9, x11, x7
+	stp	x8, x9, [rp,#16]
+L(rt):	RETVAL
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/com.asm b/mpn/arm64/com.asm
new file mode 100644
index 0000000..43e052a
--- /dev/null
+++ b/mpn/arm64/com.asm

@@ -0,0 +1,92 @@
+dnl  ARM64 mpn_com.
+
+dnl  Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 
+C Cortex-A55	 
+C Cortex-A57	 
+C Cortex-A72	 
+C Cortex-A73	 
+C X-Gene	 
+C Apple M1	 
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	cmp	n, #3
+	b.le	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tbz	rp, #3, L(al2)
+	ldr	x4, [up],#8
+	sub	n, n, #1
+	mvn	x4, x4
+	str	x4, [rp],#8
+
+L(al2):	ldp	x4,x5, [up],#16
+	sub	n, n, #6
+	tbnz	n, #63, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x6,x7, [up],#32
+	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#32
+	ldp	x4,x5, [up,#-16]
+	mvn	x6, x6
+	mvn	x7, x7
+	stp	x6,x7, [rp,#-16]
+	sub	n, n, #4
+	tbz	n, #63, L(top)
+
+L(end):	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#16
+
+C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tbz	n, #1, L(tl1)
+	ldp	x4,x5, [up],#16
+	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#16
+L(tl1):	tbz	n, #0, L(tl2)
+	ldr	x4, [up]
+	mvn	x4, x4
+	str	x4, [rp]
+L(tl2):	ret
+EPILOGUE()

diff --git a/mpn/arm64/copyd.asm b/mpn/arm64/copyd.asm
new file mode 100644
index 0000000..d4705a7
--- /dev/null
+++ b/mpn/arm64/copyd.asm

@@ -0,0 +1,85 @@
+dnl  ARM64 mpn_copyd.
+
+dnl  Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 1.8
+C Cortex-A55	 1.28
+C Cortex-A57
+C Cortex-A72	 1
+C Cortex-A73	 1.1-1.35 (alignment dependent)
+C X-Gene	 1
+C Apple M1	 0.31
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+	add	rp, rp, n, lsl #3
+	add	up, up, n, lsl #3
+
+	cmp	n, #3
+	b.le	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tbz	rp, #3, L(al2)
+	ldr	x4, [up,#-8]!
+	sub	n, n, #1
+	str	x4, [rp,#-8]!
+
+L(al2):	ldp	x4,x5, [up,#-16]!
+	sub	n, n, #6
+	tbnz	n, #63, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x6,x7, [up,#-16]
+	stp	x4,x5, [rp,#-16]
+	ldp	x4,x5, [up,#-32]!
+	stp	x6,x7, [rp,#-32]!
+	sub	n, n, #4
+	tbz	n, #63, L(top)
+
+L(end):	stp	x4,x5, [rp,#-16]!
+
+C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tbz	n, #1, L(tl1)
+	ldp	x4,x5, [up,#-16]!
+	stp	x4,x5, [rp,#-16]!
+L(tl1):	tbz	n, #0, L(tl2)
+	ldr	x4, [up,#-8]
+	str	x4, [rp,#-8]
+L(tl2):	ret
+EPILOGUE()

diff --git a/mpn/arm64/copyi.asm b/mpn/arm64/copyi.asm
new file mode 100644
index 0000000..2ebda8c
--- /dev/null
+++ b/mpn/arm64/copyi.asm

@@ -0,0 +1,82 @@
+dnl  ARM64 mpn_copyi.
+
+dnl  Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 1.8
+C Cortex-A55	 1.28
+C Cortex-A57
+C Cortex-A72	 1
+C Cortex-A73	 1.1-1.35 (alignment dependent)
+C X-Gene	 1
+C Apple M1	 0.31
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	cmp	n, #3
+	b.le	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tbz	rp, #3, L(al2)
+	ldr	x4, [up],#8
+	sub	n, n, #1
+	str	x4, [rp],#8
+
+L(al2):	ldp	x4,x5, [up],#16
+	sub	n, n, #6
+	tbnz	n, #63, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x6,x7, [up],#32
+	stp	x4,x5, [rp],#32
+	ldp	x4,x5, [up,#-16]
+	stp	x6,x7, [rp,#-16]
+	sub	n, n, #4
+	tbz	n, #63, L(top)
+
+L(end):	stp	x4,x5, [rp],#16
+
+C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tbz	n, #1, L(tl1)
+	ldp	x4,x5, [up],#16
+	stp	x4,x5, [rp],#16
+L(tl1):	tbz	n, #0, L(tl2)
+	ldr	x4, [up]
+	str	x4, [rp]
+L(tl2):	ret
+EPILOGUE()

diff --git a/mpn/arm64/darwin.m4 b/mpn/arm64/darwin.m4
new file mode 100644
index 0000000..36e72fe
--- /dev/null
+++ b/mpn/arm64/darwin.m4

@@ -0,0 +1,50 @@
+divert(-1)
+
+dnl  m4 macros for ARM64 Darwin assembler.
+
+dnl  Copyright 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Standard commenting is with @, the default m4 # is for constants and we
+dnl  don't want to disable macro expansions in or after them.
+
+changecom
+
+
+dnl  LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
+dnl
+dnl  Load the address of gmp_symbol into a register. We split this into two
+dnl  parts to allow separation for manual insn scheduling.  TODO: Darwin allows
+dnl  for relaxing these two insns into an adr and a nop, but that requires the
+dnl  .loh pseudo for connecting them.
+
+define(`LEA_HI',`adrp	$1, $2@GOTPAGE')dnl
+define(`LEA_LO',`ldr	$1, [$1, $2@GOTPAGEOFF]')dnl
+
+divert`'dnl

diff --git a/mpn/arm64/divrem_1.asm b/mpn/arm64/divrem_1.asm
new file mode 100644
index 0000000..32d1bbe
--- /dev/null
+++ b/mpn/arm64/divrem_1.asm

@@ -0,0 +1,231 @@
+dnl  ARM64 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2020 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+dnl TODO
+dnl  * Handle the most significant quotient limb for the unnormalised case
+dnl    specially, just like in the C code.  (It is very often 0.)
+
+define(`qp_arg',   x0)
+define(`fn_arg',   x1)
+define(`np_arg',   x2)
+define(`n_arg',    x3)
+define(`d_arg',    x4)
+define(`dinv_arg', x5)
+define(`cnt_arg',  x6)
+
+define(`qp',   x19)
+define(`np',   x20)
+define(`n',    x21)
+define(`d',    x22)
+define(`fn',   x24)
+define(`dinv', x0)
+define(`cnt',  x23)
+define(`tnc',  x8)
+
+dnl mp_limb_t
+dnl mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+dnl               mp_srcptr np, mp_size_t n,
+dnl               mp_limb_t d_unnorm)
+
+dnl mp_limb_t
+dnl mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+dnl                      mp_srcptr np, mp_size_t n,
+dnl                      mp_limb_t d_unnorm, mp_limb_t dinv, int cnt)
+
+ASM_START()
+
+PROLOGUE(mpn_preinv_divrem_1)
+	cbz	n_arg, L(fz)
+	stp	x29, x30, [sp, #-80]!
+	mov	x29, sp
+	stp	x19, x20, [sp, #16]
+	stp	x21, x22, [sp, #32]
+	stp	x23, x24, [sp, #48]
+
+	sub	n, n_arg, #1
+	add	x7, n, fn_arg
+	add	np, np_arg, n, lsl #3
+	add	qp, qp_arg, x7, lsl #3
+	mov	fn, fn_arg
+	mov	d, d_arg
+	mov	dinv, dinv_arg
+	tbnz	d_arg, #63, L(nentry)
+	mov	cnt, cnt_arg
+	b	L(uentry)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+	cbz	n_arg, L(fz)
+	stp	x29, x30, [sp, #-80]!
+	mov	x29, sp
+	stp	x19, x20, [sp, #16]
+	stp	x21, x22, [sp, #32]
+	stp	x23, x24, [sp, #48]
+
+	sub	n, n_arg, #1
+	add	x7, n, fn_arg
+	add	np, np_arg, n, lsl #3
+	add	qp, qp_arg, x7, lsl #3
+	mov	fn, fn_arg
+	mov	d, d_arg
+	tbnz	d_arg, #63, L(normalised)
+
+L(unnorm):
+	clz	cnt, d
+	lsl	x0, d, cnt
+	bl	GSYM_PREFIX`'MPN(invert_limb)
+L(uentry):
+	lsl	d, d, cnt
+	ldr	x7, [np], #-8
+	sub	tnc, xzr, cnt
+	lsr	x11, x7, tnc		C r
+	lsl	x1, x7, cnt
+	cbz	n, L(uend)
+
+L(utop):ldr	x7, [np], #-8
+	add	x2, x11, #1
+	mul	x10, x11, dinv
+	umulh	x17, x11, dinv
+	lsr	x9, x7, tnc
+	orr	x1, x1, x9
+	adds	x10, x1, x10
+	adc	x2, x2, x17
+	msub	x11, d, x2, x1
+	lsl	x1, x7, cnt
+	cmp	x10, x11
+	add	x14, x11, d
+	csel	x11, x14, x11, cc
+	sbc	x2, x2, xzr
+	cmp	x11, d
+	bcs	L(ufx)
+L(uok):	str	x2, [qp], #-8
+	sub	n, n, #1
+	cbnz	n, L(utop)
+
+L(uend):add	x2, x11, #1
+	mul	x10, x11, dinv
+	umulh	x17, x11, dinv
+	adds	x10, x1, x10
+	adc	x2, x2, x17
+	msub	x11, d, x2, x1
+	cmp	x10, x11
+	add	x14, x11, d
+	csel	x11, x14, x11, cc
+	sbc	x2, x2, xzr
+	subs	x14, x11, d
+	adc	x2, x2, xzr
+	csel	x11, x14, x11, cs
+	str	x2, [qp], #-8
+
+	cbnz	fn, L(ftop)
+	lsr	x0, x11, cnt
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x29, x30, [sp], #80
+	ret
+
+L(ufx):	add	x2, x2, #1
+	sub	x11, x11, d
+	b	L(uok)
+
+
+L(normalised):
+	mov	x0, d
+	bl	GSYM_PREFIX`'MPN(invert_limb)
+L(nentry):
+	ldr	x7, [np], #-8
+	subs	x14, x7, d
+	adc	x2, xzr, xzr		C hi q limb
+	csel	x11, x14, x7, cs
+	b	L(nok)
+
+L(ntop):ldr	x1, [np], #-8
+	add	x2, x11, #1
+	mul	x10, x11, dinv
+	umulh	x17, x11, dinv
+	adds	x10, x1, x10
+	adc	x2, x2, x17
+	msub	x11, d, x2, x1
+	cmp	x10, x11
+	add	x14, x11, d
+	csel	x11, x14, x11, cc	C remainder
+	sbc	x2, x2, xzr
+	cmp	x11, d
+	bcs	L(nfx)
+L(nok):	str	x2, [qp], #-8
+	sub	n, n, #1
+	tbz	n, #63, L(ntop)
+
+L(nend):cbnz	fn, L(frac)
+	mov	x0, x11
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x29, x30, [sp], #80
+	ret
+
+L(nfx):	add	x2, x2, #1
+	sub	x11, x11, d
+	b	L(nok)
+
+L(frac):mov	cnt, #0
+L(ftop):add	x2, x11, #1
+	mul	x10, x11, dinv
+	umulh	x17, x11, dinv
+	add	x2, x2, x17
+	msub	x11, d, x2, xzr
+	cmp	x10, x11
+	add	x14, x11, d
+	csel	x11, x14, x11, cc	C remainder
+	sbc	x2, x2, xzr
+	str	x2, [qp], #-8
+	sub	fn, fn, #1
+	cbnz	fn, L(ftop)
+
+	lsr	x0, x11, cnt
+	ldp	x19, x20, [sp, #16]
+	ldp	x21, x22, [sp, #32]
+	ldp	x23, x24, [sp, #48]
+	ldp	x29, x30, [sp], #80
+	ret
+
+C Block zero. We need this for the degenerated case of n = 0, fn != 0.
+L(fz):	cbz	fn_arg, L(zend)
+L(ztop):str	xzr, [qp_arg], #8
+	sub	fn_arg, fn_arg, #1
+	cbnz	fn_arg, L(ztop)
+L(zend):mov	x0, #0
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/gcd_11.asm b/mpn/arm64/gcd_11.asm
new file mode 100644
index 0000000..5f2dd50
--- /dev/null
+++ b/mpn/arm64/gcd_11.asm

@@ -0,0 +1,70 @@
+dnl  ARM v8a mpn_gcd_11.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+changecom(blah)
+
+C	     cycles/bit (approx)
+C Cortex-A35	 ?
+C Cortex-A53	 ?
+C Cortex-A55	 ?
+C Cortex-A57	 ?
+C Cortex-A72	 ?
+C Cortex-A73	 ?
+C Cortex-A75	 ?
+C Cortex-A76	 ?
+C Cortex-A77	 ?
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+define(`u0',    `x0')
+define(`v0',    `x1')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	subs	x3, u0, v0		C			0
+	b.eq	L(end)			C
+
+	ALIGN(16)
+L(top):	rbit	x12, x3			C			1,5
+	clz	x12, x12		C			2
+	csneg	x3, x3, x3, cs		C v = abs(u-v), even	1
+	csel	u0, v0, u0, cs		C u = min(u,v)		1
+	lsr	v0, x3, x12		C			3
+	subs	x3, u0, v0		C			4
+	b.ne	L(top)			C
+
+L(end):	ret
+EPILOGUE()

diff --git a/mpn/arm64/gcd_22.asm b/mpn/arm64/gcd_22.asm
new file mode 100644
index 0000000..5ea6abe
--- /dev/null
+++ b/mpn/arm64/gcd_22.asm

@@ -0,0 +1,112 @@
+dnl  ARM v8a mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+changecom(blah)
+
+C	     cycles/bit (approx)
+C Cortex-A35	 ?
+C Cortex-A53	 7.26
+C Cortex-A55	 ?
+C Cortex-A57	 ?
+C Cortex-A72	 5.72
+C Cortex-A73	 6.43
+C Cortex-A75	 ?
+C Cortex-A76	 ?
+C Cortex-A77	 ?
+
+
+define(`u1',    `x0')
+define(`u0',    `x1')
+define(`v1',    `x2')
+define(`v0',    `x3')
+
+define(`t0',    `x5')
+define(`t1',    `x6')
+define(`cnt',   `x7')
+define(`tnc',   `x8')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+
+	ALIGN(16)
+L(top):	subs	t0, u0, v0		C 0 6
+	cbz	t0, L(lowz)
+	sbcs	t1, u1, v1		C 1 7
+
+	rbit	cnt, t0			C 1
+
+	cneg	t0, t0, cc		C 2
+	cinv	t1, t1, cc		C 2 u = |u - v|
+L(bck):	csel	v0, v0, u0, cs		C 2
+	csel	v1, v1, u1, cs		C 2 v = min(u,v)
+
+	clz	cnt, cnt		C 2
+	sub	tnc, xzr, cnt		C 3
+
+	lsr	u0, t0, cnt		C 3
+	lsl	x14, t1, tnc		C 4
+	lsr	u1, t1, cnt		C 3
+	orr	u0, u0, x14		C 5
+
+	orr	x11, u1, v1
+	cbnz	x11, L(top)
+
+
+	subs	x4, u0, v0		C			0
+	b.eq	L(end1)			C
+
+	ALIGN(16)
+L(top1):rbit	x12, x4			C			1,5
+	clz	x12, x12		C			2
+	csneg	x4, x4, x4, cs		C v = abs(u-v), even	1
+	csel	u0, v0, u0, cs		C u = min(u,v)		1
+	lsr	v0, x4, x12		C			3
+	subs	x4, u0, v0		C			4
+	b.ne	L(top1)			C
+L(end1):mov	x0, u0
+	mov	x1, #0
+	ret
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	subs	t0, u1, v1
+	b.eq	L(end)
+	mov	t1, #0
+	rbit	cnt, t0			C 1
+	cneg	t0, t0, cc		C 2
+	b	L(bck)			C FIXME: make conditional
+
+L(end):	mov	x0, v0
+	mov	x1, v1
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/gmp-mparam.h b/mpn/arm64/gmp-mparam.h
new file mode 100644
index 0000000..7c0c193
--- /dev/null
+++ b/mpn/arm64/gmp-mparam.h

@@ -0,0 +1,192 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1536 MHz Cortex-A53 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-09-29, gcc 5.4 */
+
+#define DIVREM_1_NORM_THRESHOLD              3
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_1P_METHOD                      2  /* 2.08% faster than 1 */
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     21
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_1N_PI1_METHOD                 1  /* 38.26% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD             13
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           40
+
+#define DIV_1_VS_MUL_1_PERCENT             159
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                49
+#define MUL_TOOM44_THRESHOLD                82
+#define MUL_TOOM6H_THRESHOLD               173
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      74
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 18
+#define SQR_TOOM3_THRESHOLD                 67
+#define SQR_TOOM4_THRESHOLD                166
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                333
+
+#define MULMID_TOOM42_THRESHOLD             20
+
+#define MULMOD_BNM1_THRESHOLD               10
+#define SQRMOD_BNM1_THRESHOLD               11
+
+#define MUL_FFT_MODF_THRESHOLD             316  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    316, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
+    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     83,10}, {     47, 9}, {     99,10}, {     55,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
+    {    131,10}, {     71, 8}, {    287,10}, {     79, 9}, \
+    {    159, 8}, {    319,10}, {     87,11}, {     47,10}, \
+    {     95, 9}, {    191, 8}, {    383,10}, {    103, 9}, \
+    {    207, 8}, {    415,10}, {    111, 9}, {    223,12}, \
+    {     31,11}, {     63, 9}, {    255, 8}, {    511,10}, \
+    {    135, 9}, {    287, 8}, {    575,11}, {     79,10}, \
+    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
+    {    351, 8}, {    703,11}, {     95,10}, {    191, 9}, \
+    {    383, 8}, {    767,10}, {    207, 9}, {    415,11}, \
+    {    111,10}, {    223, 9}, {    447,12}, {     63,10}, \
+    {    255, 9}, {    511, 8}, {   1023, 9}, {    543,10}, \
+    {    287, 9}, {    575, 8}, {   1151,11}, {    159,10}, \
+    {    319, 9}, {    639,11}, {    175,10}, {    351, 9}, \
+    {    703, 8}, {   1407,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 118
+#define MUL_FFT_THRESHOLD                 3200
+
+#define SQR_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    272, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {     17, 7}, {     17, 8}, \
+    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+    {     19, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95, 8}, {    191,10}, {     55,11}, {     31,10}, \
+    {     63, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
+    {    287,10}, {     79, 9}, {    159,11}, {     47,10}, \
+    {     95, 9}, {    191, 8}, {    383, 7}, {    767,10}, \
+    {    103, 9}, {    207,12}, {     31,11}, {     63, 9}, \
+    {    255, 8}, {    511, 7}, {   1023, 9}, {    271,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319, 8}, {    639,10}, {    175, 9}, {    351, 8}, \
+    {    703,11}, {     95,10}, {    191, 9}, {    383, 8}, \
+    {    767,10}, {    207, 9}, {    415, 8}, {    831,10}, \
+    {    223,12}, {     63,10}, {    255, 9}, {    511, 8}, \
+    {   1023,10}, {    271,11}, {    143,10}, {    287, 9}, \
+    {    575, 8}, {   1151,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    175,10}, {    351, 9}, {    703,12}, \
+    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
+    {    207,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 112
+#define SQR_FFT_THRESHOLD                 2688
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  38
+#define MULLO_MUL_N_THRESHOLD             6253
+#define SQRLO_BASECASE_THRESHOLD             4
+#define SQRLO_DC_THRESHOLD                  67
+#define SQRLO_SQR_THRESHOLD               5240
+
+#define DC_DIV_QR_THRESHOLD                 42
+#define DC_DIVAPPR_Q_THRESHOLD             152
+#define DC_BDIV_QR_THRESHOLD                39
+#define DC_BDIV_Q_THRESHOLD                 93
+
+#define INV_MULMOD_BNM1_THRESHOLD           37
+#define INV_NEWTON_THRESHOLD               163
+#define INV_APPR_THRESHOLD                 162
+
+#define BINV_NEWTON_THRESHOLD              194
+#define REDC_1_TO_REDC_N_THRESHOLD          43
+
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MUPI_DIV_QR_THRESHOLD               98
+#define MU_BDIV_QR_THRESHOLD               807
+#define MU_BDIV_Q_THRESHOLD                924
+
+#define POWM_SEC_TABLE  6,30,194,579,1730
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define SET_STR_DC_THRESHOLD               788
+#define SET_STR_PRECOMPUTE_THRESHOLD      1816
+
+#define FAC_DSC_THRESHOLD                  236
+#define FAC_ODD_THRESHOLD                   24
+
+#define MATRIX22_STRASSEN_THRESHOLD         10
+#define HGCD2_DIV1_METHOD                    1  /* 7.05% faster than 3 */
+#define HGCD_THRESHOLD                     101
+#define HGCD_APPR_THRESHOLD                104
+#define HGCD_REDUCE_THRESHOLD             1679
+#define GCD_DC_THRESHOLD                   330
+#define GCDEXT_DC_THRESHOLD                242
+#define JACOBI_BASE_METHOD                   4  /* 20.00% faster than 1 */

diff --git a/mpn/arm64/hamdist.asm b/mpn/arm64/hamdist.asm
new file mode 100644
index 0000000..1dc6a62
--- /dev/null
+++ b/mpn/arm64/hamdist.asm

@@ -0,0 +1,181 @@
+dnl  ARM64 Neon mpn_hamdist -- mpn bit hamming distance.
+
+dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 4.5
+C Cortex-A57	 1.9
+C X-Gene	 4.36
+
+C TODO
+C  * Consider greater unrolling.
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+changecom(blah)
+
+C INPUT PARAMETERS
+define(`ap', x0)
+define(`bp', x1)
+define(`n',  x2)
+
+C We sum into 16 16-bit counters in v4,v5, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/64 = 0x1fff limbs.  We use a chunksize close to that, but which
+C  allows the huge count code to jump deep into the code (at L(chu)).
+
+define(`maxsize',  0x1fff)
+define(`chunksize',0x1ff0)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+
+	mov	x11, #maxsize
+	cmp	n, x11
+	b.hi	L(gt8k)
+
+L(lt8k):
+	movi	v4.16b, #0			C clear summation register
+	movi	v5.16b, #0			C clear summation register
+
+	tbz	n, #0, L(xx0)
+	sub	n, n, #1
+	ld1	{v0.1d}, [ap], #8		C load 1 limb
+	ld1	{v16.1d}, [bp], #8		C load 1 limb
+	eor	v0.16b, v0.16b, v16.16b
+	cnt	v6.16b, v0.16b
+	uadalp	v4.8h,  v6.16b			C could also splat
+
+L(xx0):	tbz	n, #1, L(x00)
+	sub	n, n, #2
+	ld1	{v0.2d}, [ap], #16		C load 2 limbs
+	ld1	{v16.2d}, [bp], #16		C load 2 limbs
+	eor	v0.16b, v0.16b, v16.16b
+	cnt	v6.16b, v0.16b
+	uadalp	v4.8h,  v6.16b
+
+L(x00):	tbz	n, #2, L(000)
+	subs	n, n, #4
+	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	ld1	{v16.2d,v17.2d}, [bp], #32	C load 4 limbs
+	b.ls	L(sum)
+
+L(gt4):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	ld1	{v18.2d,v19.2d}, [bp], #32	C load 4 limbs
+	eor	v0.16b, v0.16b, v16.16b
+	eor	v1.16b, v1.16b, v17.16b
+	sub	n, n, #4
+	cnt	v6.16b, v0.16b
+	cnt	v7.16b, v1.16b
+	b	L(mid)
+
+L(000):	subs	n, n, #8
+	b.lo	L(e0)
+
+L(chu):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	ld1	{v18.2d,v19.2d}, [bp], #32	C load 4 limbs
+	ld1	{v16.2d,v17.2d}, [bp], #32	C load 4 limbs
+	eor	v2.16b, v2.16b, v18.16b
+	eor	v3.16b, v3.16b, v19.16b
+	cnt	v6.16b, v2.16b
+	cnt	v7.16b, v3.16b
+	subs	n, n, #8
+	b.lo	L(end)
+
+L(top):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	ld1	{v18.2d,v19.2d}, [bp], #32	C load 4 limbs
+	eor	v0.16b, v0.16b, v16.16b
+	eor	v1.16b, v1.16b, v17.16b
+	uadalp	v4.8h,  v6.16b
+	cnt	v6.16b, v0.16b
+	uadalp	v5.8h,  v7.16b
+	cnt	v7.16b, v1.16b
+L(mid):	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	ld1	{v16.2d,v17.2d}, [bp], #32	C load 4 limbs
+	eor	v2.16b, v2.16b, v18.16b
+	eor	v3.16b, v3.16b, v19.16b
+	subs	n, n, #8
+	uadalp	v4.8h,  v6.16b
+	cnt	v6.16b, v2.16b
+	uadalp	v5.8h,  v7.16b
+	cnt	v7.16b, v3.16b
+	b.hs	L(top)
+
+L(end):	uadalp	v4.8h,  v6.16b
+	uadalp	v5.8h,  v7.16b
+L(sum):	eor	v0.16b, v0.16b, v16.16b
+	eor	v1.16b, v1.16b, v17.16b
+	cnt	v6.16b, v0.16b
+	cnt	v7.16b, v1.16b
+	uadalp	v4.8h,  v6.16b
+	uadalp	v5.8h,  v7.16b
+	add	v4.8h, v4.8h, v5.8h
+					C we have 8 16-bit counts
+L(e0):	uaddlp	v4.4s,  v4.8h		C we have 4 32-bit counts
+	uaddlp	v4.2d,  v4.4s		C we have 2 64-bit counts
+	mov	x0, v4.d[0]
+	mov	x1, v4.d[1]
+	add	x0, x0, x1
+	ret
+
+C Code for count > maxsize.  Splits operand and calls above code.
+define(`ap2', x5)			C caller-saves reg not used above
+define(`bp2', x6)			C caller-saves reg not used above
+L(gt8k):
+	mov	x8, x30
+	mov	x7, n			C full count (caller-saves reg not used above)
+	mov	x4, #0			C total sum  (caller-saves reg not used above)
+	mov	x9, #chunksize*8	C caller-saves reg not used above
+	mov	x10, #chunksize		C caller-saves reg not used above
+
+1:	add	ap2, ap, x9		C point at subsequent block
+	add	bp2, bp, x9		C point at subsequent block
+	mov	n, #chunksize-8		C count for this invocation, adjusted for entry pt
+	movi	v4.16b, #0		C clear chunk summation register
+	movi	v5.16b, #0		C clear chunk summation register
+	bl	L(chu)			C jump deep inside code
+	add	x4, x4, x0
+	mov	ap, ap2			C put chunk pointer in place for calls
+	mov	bp, bp2			C put chunk pointer in place for calls
+	sub	x7, x7, x10
+	cmp	x7, x11
+	b.hi	1b
+
+	mov	n, x7			C count for final invocation
+	bl	L(lt8k)
+	add	x0, x4, x0
+	mov	x30, x8
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/invert_limb.asm b/mpn/arm64/invert_limb.asm
new file mode 100644
index 0000000..e372dd0
--- /dev/null
+++ b/mpn/arm64/invert_limb.asm

@@ -0,0 +1,83 @@
+dnl  ARM64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C            cycles/limb
+C Cortex-A53     ?
+C Cortex-A57     ?
+
+C Compiler generated, mildly edited.  Could surely be further optimised.
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+	lsr	x2, x0, #54
+	LEA_HI(	x1, approx_tab)
+	and	x2, x2, #0x1fe
+	LEA_LO(	x1, approx_tab)
+	ldrh	w3, [x1,x2]
+	lsr	x4, x0, #24
+	add	x4, x4, #1
+	ubfiz	x2, x3, #11, #16
+	umull	x3, w3, w3
+	mul	x3, x3, x4
+	sub	x2, x2, #1
+	sub	x2, x2, x3, lsr #40
+	lsl	x3, x2, #60
+	mul	x1, x2, x2
+	msub	x1, x1, x4, x3
+	lsl	x2, x2, #13
+	add	x1, x2, x1, lsr #47
+	and	x2, x0, #1
+	neg	x3, x2
+	and	x3, x3, x1, lsr #1
+	add	x2, x2, x0, lsr #1
+	msub	x2, x1, x2, x3
+	umulh	x2, x2, x1
+	lsl	x1, x1, #31
+	add	x1, x1, x2, lsr #1
+	mul	x3, x1, x0
+	umulh	x2, x1, x0
+	adds	x4, x3, x0
+	adc	x0, x2, x0
+	sub	x0, x1, x0
+	ret
+EPILOGUE()
+
+	RODATA
+	ALIGN(2)
+	TYPE(   approx_tab, object)
+	SIZE(   approx_tab, 512)
+approx_tab:
+forloop(i,256,512-1,dnl
+`	.hword	eval(0x7fd00/i)
+')dnl

diff --git a/mpn/arm64/logops_n.asm b/mpn/arm64/logops_n.asm
new file mode 100644
index 0000000..6520881
--- /dev/null
+++ b/mpn/arm64/logops_n.asm

@@ -0,0 +1,139 @@
+dnl  ARM64 mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb     cycles/limb
+C	      nand,nior	      all other
+C Cortex-A53	3.25-3.5	2.75-3
+C Cortex-A57	 2.0		 1.5
+C X-Gene	 2.14		 2.0
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n',  `x3')
+
+define(`POSTOP', `dnl')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic	$1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`LOGOP',   `orn	$1, $2, $3')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`LOGOP',   `eon	$1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x7, [up]
+	ldr	x11, [vp]
+	LOGOP(	x15, x7, x11)
+	POSTOP(	x15)
+	str	x15, [rp],#8
+	tbnz	n, #1, L(b11)
+
+L(b01):	cbz	x17, L(ret)
+	ldp	x4, x5, [up,#8]
+	ldp	x8, x9, [vp,#8]
+	sub	up, up, #8
+	sub	vp, vp, #8
+	b	L(mid)
+
+L(b11):	ldp	x6, x7, [up,#8]
+	ldp	x10, x11, [vp,#8]
+	add	up, up, #8
+	add	vp, vp, #8
+	cbz	x17, L(end)
+	b	L(top)
+
+L(bx0):	tbnz	n, #1, L(b10)
+
+L(b00):	ldp	x4, x5, [up],#-16
+	ldp	x8, x9, [vp],#-16
+	b	L(mid)
+
+L(b10):	ldp	x6, x7, [up]
+	ldp	x10, x11, [vp]
+	cbz	x17, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#16]
+	ldp	x8, x9, [vp,#16]
+	LOGOP(	x12, x6, x10)
+	LOGOP(	x13, x7, x11)
+	POSTOP(	x12)
+	POSTOP(	x13)
+	stp	x12, x13, [rp],#16
+L(mid):	ldp	x6, x7, [up,#32]!
+	ldp	x10, x11, [vp,#32]!
+	LOGOP(	x12, x4, x8)
+	LOGOP(	x13, x5, x9)
+	POSTOP(	x12)
+	POSTOP(	x13)
+	stp	x12, x13, [rp],#16
+	sub	x17, x17, #1
+	cbnz	x17, L(top)
+
+L(end):	LOGOP(	x12, x6, x10)
+	LOGOP(	x13, x7, x11)
+	POSTOP(	x12)
+	POSTOP(	x13)
+	stp	x12, x13, [rp]
+L(ret):	ret
+EPILOGUE()

diff --git a/mpn/arm64/lshift.asm b/mpn/arm64/lshift.asm
new file mode 100644
index 0000000..1c9b39a
--- /dev/null
+++ b/mpn/arm64/lshift.asm

@@ -0,0 +1,138 @@
+dnl  ARM64 mpn_lshift.
+
+dnl  Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb   assumed optimal c/l
+C Cortex-A53	3.5-4.0		 3.25
+C Cortex-A57	 2.0		 2.0
+C X-Gene	 2.67		 2.5
+
+C TODO
+C  * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes.  These
+C    numbers should be 1 and 0, respectively.  The str in wind-down should also
+C    go.
+C  * Using extr and with 63 separate loops we might reach 1.25 c/l on A57.
+C  * A53's speed depends on alignment, tune/speed -w1 gives 3.5, -w0 gives 4.0.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up',     `x1')
+define(`n',      `x2')
+define(`cnt',    `x3')
+
+define(`rp',     `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsl)
+define(`NSHIFT', lsr)
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	add	rp, rp_arg, n, lsl #3
+	add	up, up, n, lsl #3
+	sub	tnc, xzr, cnt
+	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x4, [up,#-8]
+	tbnz	n, #1, L(b11)
+
+L(b01):	NSHIFT	x0, x4, tnc
+	PSHIFT	x2, x4, cnt
+	cbnz	x17, L(gt1)
+	str	x2, [rp,#-8]
+	ret
+L(gt1):	ldp	x4, x5, [up,#-24]
+	sub	up, up, #8
+	add	rp, rp, #16
+	b	L(lo2)
+
+L(b11):	NSHIFT	x0, x4, tnc
+	PSHIFT	x2, x4, cnt
+	ldp	x6, x7, [up,#-24]!
+	b	L(lo3)
+
+L(bx0):	ldp	x4, x5, [up,#-16]
+	tbz	n, #1, L(b00)
+
+L(b10):	NSHIFT	x0, x5, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x10, x4, tnc
+	PSHIFT	x2, x4, cnt
+	cbnz	x17, L(gt2)
+	orr	x10, x10, x13
+	stp	x2, x10, [rp,#-16]
+	ret
+L(gt2):	ldp	x4, x5, [up,#-32]
+	orr	x10, x10, x13
+	str	x10, [rp,#-8]
+	sub	up, up, #16
+	add	rp, rp, #8
+	b	L(lo2)
+
+L(b00):	NSHIFT	x0, x5, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x10, x4, tnc
+	PSHIFT	x2, x4, cnt
+	ldp	x6, x7, [up,#-32]!
+	orr	x10, x10, x13
+	str	x10, [rp,#-8]!
+	b	L(lo0)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#-16]
+	orr	x10, x10, x13
+	orr	x11, x12, x2
+	stp	x10, x11, [rp,#-16]
+	PSHIFT	x2, x6, cnt
+L(lo2):	NSHIFT	x10, x4, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x12, x5, tnc
+	ldp	x6, x7, [up,#-32]!
+	orr	x10, x10, x13
+	orr	x11, x12, x2
+	stp	x10, x11, [rp,#-32]!
+	PSHIFT	x2, x4, cnt
+L(lo0):	sub	x17, x17, #1
+L(lo3):	NSHIFT	x10, x6, tnc
+	PSHIFT	x13, x7, cnt
+	NSHIFT	x12, x7, tnc
+	cbnz	x17, L(top)
+
+L(end):	orr	x10, x10, x13
+	orr	x11, x12, x2
+	PSHIFT	x2, x6, cnt
+	stp	x10, x11, [rp,#-16]
+	str	x2, [rp,#-24]
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/lshiftc.asm b/mpn/arm64/lshiftc.asm
new file mode 100644
index 0000000..8b8287d
--- /dev/null
+++ b/mpn/arm64/lshiftc.asm

@@ -0,0 +1,141 @@
+dnl  ARM64 mpn_lshiftc.
+
+dnl  Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb   assumed optimal c/l
+C Cortex-A53	3.5-4.0		 3.25
+C Cortex-A57	 2.0		 2.0
+C X-Gene	 2.67		 2.5
+
+C TODO
+C  * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes.  These
+C    numbers should be 1 and 0, respectively.  The str in wind-down should also
+C    go.
+C  * Using extr and with 63 separate loops we might reach 1.5 c/l on A57.
+C  * A53's speed depends on alignment, tune/speed -w1 gives 3.5, -w0 gives 4.0.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up',     `x1')
+define(`n',      `x2')
+define(`cnt',    `x3')
+
+define(`rp',     `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsl)
+define(`NSHIFT', lsr)
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+	add	rp, rp_arg, n, lsl #3
+	add	up, up, n, lsl #3
+	sub	tnc, xzr, cnt
+	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x4, [up,#-8]
+	tbnz	n, #1, L(b11)
+
+L(b01):	NSHIFT	x0, x4, tnc
+	PSHIFT	x2, x4, cnt
+	cbnz	x17, L(gt1)
+	mvn	x2, x2
+	str	x2, [rp,#-8]
+	ret
+L(gt1):	ldp	x4, x5, [up,#-24]
+	sub	up, up, #8
+	add	rp, rp, #16
+	b	L(lo2)
+
+L(b11):	NSHIFT	x0, x4, tnc
+	PSHIFT	x2, x4, cnt
+	ldp	x6, x7, [up,#-24]!
+	b	L(lo3)
+
+L(bx0):	ldp	x4, x5, [up,#-16]
+	tbz	n, #1, L(b00)
+
+L(b10):	NSHIFT	x0, x5, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x10, x4, tnc
+	PSHIFT	x2, x4, cnt
+	cbnz	x17, L(gt2)
+	eon	x10, x10, x13
+	mvn	x2, x2
+	stp	x2, x10, [rp,#-16]
+	ret
+L(gt2):	ldp	x4, x5, [up,#-32]
+	eon	x10, x10, x13
+	str	x10, [rp,#-8]
+	sub	up, up, #16
+	add	rp, rp, #8
+	b	L(lo2)
+
+L(b00):	NSHIFT	x0, x5, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x10, x4, tnc
+	PSHIFT	x2, x4, cnt
+	ldp	x6, x7, [up,#-32]!
+	eon	x10, x10, x13
+	str	x10, [rp,#-8]!
+	b	L(lo0)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#-16]
+	eon	x10, x10, x13
+	eon	x11, x12, x2
+	stp	x10, x11, [rp,#-16]
+	PSHIFT	x2, x6, cnt
+L(lo2):	NSHIFT	x10, x4, tnc
+	PSHIFT	x13, x5, cnt
+	NSHIFT	x12, x5, tnc
+	ldp	x6, x7, [up,#-32]!
+	eon	x10, x10, x13
+	eon	x11, x12, x2
+	stp	x10, x11, [rp,#-32]!
+	PSHIFT	x2, x4, cnt
+L(lo0):	sub	x17, x17, #1
+L(lo3):	NSHIFT	x10, x6, tnc
+	PSHIFT	x13, x7, cnt
+	NSHIFT	x12, x7, tnc
+	cbnz	x17, L(top)
+
+L(end):	eon	x10, x10, x13
+	eon	x11, x12, x2
+	PSHIFT	x2, x6, cnt
+	stp	x10, x11, [rp,#-16]
+	mvn	x2, x2
+	str	x2, [rp,#-24]
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/mod_34lsub1.asm b/mpn/arm64/mod_34lsub1.asm
new file mode 100644
index 0000000..5c64812
--- /dev/null
+++ b/mpn/arm64/mod_34lsub1.asm

@@ -0,0 +1,124 @@
+dnl  ARM64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl  Copyright 2012-2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 2
+C Cortex-A57	 1
+C X-Gene	 1.45
+
+define(`ap',	x0)
+define(`n',	x1)
+
+changecom(blah)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * An alternative inner loop which could run at 0.722 c/l on A57:
+C	adds	x8, x8, x2
+C	adcs	x9, x9, x3
+C	ldp	x2, x3, [ap, #-32]
+C	adcs	x10, x10, x4
+C	adc	x12, x12, xzr
+C	adds	x8, x8, x5
+C	ldp	x4, x5, [ap, #-16]
+C	sub	n, n, #6
+C	adcs	x9, x9, x6
+C	adcs	x10, x10, x7
+C	ldp	x6, x7, [ap], #48
+C	adc	x12, x12, xzr
+C	tbz	n, #63, L(top)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	subs	n, n, #3
+	mov	x8, #0
+	b.lt	L(le2)			C n <= 2
+
+	ldp	x2, x3, [ap, #0]
+	ldr	x4, [ap, #16]
+	add	ap, ap, #24
+	subs	n, n, #3
+	b.lt	L(sum)			C n <= 5
+	cmn	x0, #0			C clear carry
+
+L(top):	ldp	x5, x6, [ap, #0]
+	ldr	x7, [ap, #16]
+	add	ap, ap, #24
+	sub	n, n, #3
+	adcs	x2, x2, x5
+	adcs	x3, x3, x6
+	adcs	x4, x4, x7
+	tbz	n, #63, L(top)
+
+	adc	x8, xzr, xzr		C x8 <= 1
+
+L(sum):	cmn	n, #2
+	mov	x5, #0
+	b.lo	1f
+	ldr	x5, [ap], #8
+1:	mov	x6, #0
+	b.ls	1f
+	ldr	x6, [ap], #8
+1:	adds	x2, x2, x5
+	adcs	x3, x3, x6
+	adcs	x4, x4, xzr
+	adc	x8, x8, xzr		C x8 <= 2
+
+L(sum2):
+	and	x0, x2, #0xffffffffffff
+	add	x0, x0, x2, lsr #48
+	add	x0, x0, x8
+
+	lsl	x8, x3, #16
+	and	x1, x8, #0xffffffffffff
+	add	x0, x0, x1
+	add	x0, x0, x3, lsr #32
+
+	lsl	x8, x4, #32
+	and	x1, x8, #0xffffffffffff
+	add	x0, x0, x1
+	add	x0, x0, x4, lsr #16
+	ret
+
+L(le2):	cmn	n, #1
+	b.ne	L(1)
+	ldp	x2, x3, [ap]
+	mov	x4, #0
+	b	L(sum2)
+L(1):	ldr	x2, [ap]
+	and	x0, x2, #0xffffffffffff
+	add	x0, x0, x2, lsr #48
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/mul_1.asm b/mpn/arm64/mul_1.asm
new file mode 100644
index 0000000..f9bf251
--- /dev/null
+++ b/mpn/arm64/mul_1.asm

@@ -0,0 +1,128 @@
+dnl  ARM64 mpn_mul_1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	7.5-8
+C Cortex-A57	 7
+C Cortex-A72
+C X-Gene	 4
+C Apple M1	 1
+
+C TODO
+C  * Start first multiply earlier.
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+define(`v0', `x3')
+
+
+PROLOGUE(mpn_mul_1c)
+	adds	xzr, xzr, xzr		C clear cy flag
+	b	L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+	adds	x4, xzr, xzr		C clear register and cy flag
+L(com):	lsr	x17, n, #2
+	tbnz	n, #0, L(bx1)
+
+L(bx0):	mov	x11, x4
+	tbz	n, #1, L(b00)
+
+L(b10):	ldp	x4, x5, [up]
+	mul	x8, x4, v0
+	umulh	x10, x4, v0
+	cbz	x17, L(2)
+	ldp	x6, x7, [up,#16]!
+	mul	x9, x5, v0
+	b	L(mid)-8
+
+L(2):	mul	x9, x5, v0
+	b	L(2e)
+
+L(bx1):	ldr	x7, [up],#8
+	mul	x9, x7, v0
+	umulh	x11, x7, v0
+	adds	x9, x9, x4
+	str	x9, [rp],#8
+	tbnz	n, #1, L(b10)
+
+L(b01):	cbz	x17, L(1)
+
+L(b00):	ldp	x6, x7, [up]
+	mul	x8, x6, v0
+	umulh	x10, x6, v0
+	ldp	x4, x5, [up,#16]
+	mul	x9, x7, v0
+	adcs	x12, x8, x11
+	umulh	x11, x7, v0
+	add	rp, rp, #16
+	sub	x17, x17, #1
+	cbz	x17, L(end)
+
+	ALIGN(16)
+L(top):	mul	x8, x4, v0
+	ldp	x6, x7, [up,#32]!
+	adcs	x13, x9, x10
+	umulh	x10, x4, v0
+	mul	x9, x5, v0
+	stp	x12, x13, [rp,#-16]
+	adcs	x12, x8, x11
+	umulh	x11, x5, v0
+L(mid):	mul	x8, x6, v0
+	ldp	x4, x5, [up,#16]
+	adcs	x13, x9, x10
+	umulh	x10, x6, v0
+	mul	x9, x7, v0
+	stp	x12, x13, [rp],#32
+	adcs	x12, x8, x11
+	umulh	x11, x7, v0
+	sub	x17, x17, #1
+	cbnz	x17, L(top)
+
+L(end):	mul	x8, x4, v0
+	adcs	x13, x9, x10
+	umulh	x10, x4, v0
+	mul	x9, x5, v0
+	stp	x12, x13, [rp,#-16]
+L(2e):	adcs	x12, x8, x11
+	umulh	x11, x5, v0
+	adcs	x13, x9, x10
+	stp	x12, x13, [rp]
+L(1):	adc	x0, x11, xzr
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/popcount.asm b/mpn/arm64/popcount.asm
new file mode 100644
index 0000000..33954b9
--- /dev/null
+++ b/mpn/arm64/popcount.asm

@@ -0,0 +1,157 @@
+dnl  ARM64 Neon mpn_popcount -- mpn bit population count.
+
+dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 2.5
+C Cortex-A57	 1.14
+C X-Gene	 3
+
+C TODO
+C  * Consider greater unrolling.
+C  * Arrange to align the pointer, if that helps performance.  Use the same
+C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
+C    valgrind!)
+C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+changecom(blah)
+
+C INPUT PARAMETERS
+define(`ap', x0)
+define(`n',  x1)
+
+C We sum into 16 16-bit counters in v4,v5, but at the end we sum them and end
+C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/64 = 0x1fff limbs.  We use a chunksize close to that, but which
+C  allows the huge count code to jump deep into the code (at L(chu)).
+
+define(`maxsize',  0x1fff)
+define(`chunksize',0x1ff0)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+
+	mov	x11, #maxsize
+	cmp	n, x11
+	b.hi	L(gt8k)
+
+L(lt8k):
+	movi	v4.16b, #0			C clear summation register
+	movi	v5.16b, #0			C clear summation register
+
+	tbz	n, #0, L(xx0)
+	sub	n, n, #1
+	ld1	{v0.1d}, [ap], #8		C load 1 limb
+	cnt	v6.16b, v0.16b
+	uadalp	v4.8h,  v6.16b			C could also splat
+
+L(xx0):	tbz	n, #1, L(x00)
+	sub	n, n, #2
+	ld1	{v0.2d}, [ap], #16		C load 2 limbs
+	cnt	v6.16b, v0.16b
+	uadalp	v4.8h,  v6.16b
+
+L(x00):	tbz	n, #2, L(000)
+	subs	n, n, #4
+	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	b.ls	L(sum)
+
+L(gt4):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	sub	n, n, #4
+	cnt	v6.16b, v0.16b
+	cnt	v7.16b, v1.16b
+	b	L(mid)
+
+L(000):	subs	n, n, #8
+	b.lo	L(e0)
+
+L(chu):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	cnt	v6.16b, v2.16b
+	cnt	v7.16b, v3.16b
+	subs	n, n, #8
+	b.lo	L(end)
+
+L(top):	ld1	{v2.2d,v3.2d}, [ap], #32	C load 4 limbs
+	uadalp	v4.8h,  v6.16b
+	cnt	v6.16b, v0.16b
+	uadalp	v5.8h,  v7.16b
+	cnt	v7.16b, v1.16b
+L(mid):	ld1	{v0.2d,v1.2d}, [ap], #32	C load 4 limbs
+	subs	n, n, #8
+	uadalp	v4.8h,  v6.16b
+	cnt	v6.16b, v2.16b
+	uadalp	v5.8h,  v7.16b
+	cnt	v7.16b, v3.16b
+	b.hs	L(top)
+
+L(end):	uadalp	v4.8h,  v6.16b
+	uadalp	v5.8h,  v7.16b
+L(sum):	cnt	v6.16b, v0.16b
+	cnt	v7.16b, v1.16b
+	uadalp	v4.8h,  v6.16b
+	uadalp	v5.8h,  v7.16b
+	add	v4.8h, v4.8h, v5.8h
+					C we have 8 16-bit counts
+L(e0):	uaddlp	v4.4s,  v4.8h		C we have 4 32-bit counts
+	uaddlp	v4.2d,  v4.4s		C we have 2 64-bit counts
+	mov	x0, v4.d[0]
+	mov	x1, v4.d[1]
+	add	x0, x0, x1
+	ret
+
+C Code for count > maxsize.  Splits operand and calls above code.
+define(`ap2', x5)			C caller-saves reg not used above
+L(gt8k):
+	mov	x8, x30
+	mov	x7, n			C full count (caller-saves reg not used above)
+	mov	x4, #0			C total sum  (caller-saves reg not used above)
+	mov	x9, #chunksize*8	C caller-saves reg not used above
+	mov	x10, #chunksize		C caller-saves reg not used above
+
+1:	add	ap2, ap, x9		C point at subsequent block
+	mov	n, #chunksize-8		C count for this invocation, adjusted for entry pt
+	movi	v4.16b, #0		C clear chunk summation register
+	movi	v5.16b, #0		C clear chunk summation register
+	bl	L(chu)			C jump deep inside code
+	add	x4, x4, x0
+	mov	ap, ap2			C put chunk pointer in place for calls
+	sub	x7, x7, x10
+	cmp	x7, x11
+	b.hi	1b
+
+	mov	n, x7			C count for final invocation
+	bl	L(lt8k)
+	add	x0, x4, x0
+	mov	x30, x8
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/rsh1aors_n.asm b/mpn/arm64/rsh1aors_n.asm
new file mode 100644
index 0000000..1ce81f5
--- /dev/null
+++ b/mpn/arm64/rsh1aors_n.asm

@@ -0,0 +1,168 @@
+dnl  ARM64 mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb   assumed optimal c/l
+C Cortex-A53	3.25-3.75	 3.0 steady
+C Cortex-A57	 2.15		 1.75
+C X-Gene	 2.75		 2.5
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n',  `x3')
+
+ifdef(`OPERATION_rsh1add_n', `
+  define(`ADDSUB',	adds)
+  define(`ADDSUBC',	adcs)
+  define(`COND',	`cs')
+  define(`func_n',	mpn_rsh1add_n)')
+ifdef(`OPERATION_rsh1sub_n', `
+  define(`ADDSUB',	subs)
+  define(`ADDSUBC',	sbcs)
+  define(`COND',	`cc')
+  define(`func_n',	mpn_rsh1sub_n)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func_n)
+	lsr	x6, n, #2
+
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x5, [up],#8
+	ldr	x9, [vp],#8
+	tbnz	n, #1, L(b11)
+
+L(b01):	ADDSUB	x13, x5, x9
+	and	x10, x13, #1
+	cbz	x6, L(1)
+	ldp	x4, x5, [up],#48
+	ldp	x8, x9, [vp],#48
+	ADDSUBC	x14, x4, x8
+	ADDSUBC	x15, x5, x9
+	ldp	x4, x5, [up,#-32]
+	ldp	x8, x9, [vp,#-32]
+	extr	x17, x14, x13, #1
+	ADDSUBC	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	str	x17, [rp], #24
+	sub	x6, x6, #1
+	cbz	x6, L(end)
+	b	L(top)
+
+L(1):	cset	x14, COND
+	extr	x17, x14, x13, #1
+	str	x17, [rp]
+	mov	x0, x10
+	ret
+
+L(b11):	ADDSUB	x15, x5, x9
+	and	x10, x15, #1
+
+	ldp	x4, x5, [up],#32
+	ldp	x8, x9, [vp],#32
+	ADDSUBC	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	cbz	x6, L(3)
+	ldp	x4, x5, [up,#-16]
+	ldp	x8, x9, [vp,#-16]
+	extr	x17, x12, x15, #1
+	ADDSUBC	x14, x4, x8
+	ADDSUBC	x15, x5, x9
+	str	x17, [rp], #8
+	b	L(mid)
+
+L(3):	extr	x17, x12, x15, #1
+	str	x17, [rp], #8
+	b	L(2)
+
+L(bx0):	tbz	n, #1, L(b00)
+
+L(b10):	ldp	x4, x5, [up],#32
+	ldp	x8, x9, [vp],#32
+	ADDSUB	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	and	x10, x12, #1
+	cbz	x6, L(2)
+	ldp	x4, x5, [up,#-16]
+	ldp	x8, x9, [vp,#-16]
+	ADDSUBC	x14, x4, x8
+	ADDSUBC	x15, x5, x9
+	b	L(mid)
+
+L(b00):	ldp	x4, x5, [up],#48
+	ldp	x8, x9, [vp],#48
+	ADDSUB	x14, x4, x8
+	ADDSUBC	x15, x5, x9
+	and	x10, x14, #1
+	ldp	x4, x5, [up,#-32]
+	ldp	x8, x9, [vp,#-32]
+	ADDSUBC	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	add	rp, rp, #16
+	sub	x6, x6, #1
+	cbz	x6, L(end)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#-16]
+	ldp	x8, x9, [vp,#-16]
+	extr	x16, x15, x14, #1
+	extr	x17, x12, x15, #1
+	ADDSUBC	x14, x4, x8
+	ADDSUBC	x15, x5, x9
+	stp	x16, x17, [rp,#-16]
+L(mid):	ldp	x4, x5, [up],#32
+	ldp	x8, x9, [vp],#32
+	extr	x16, x13, x12, #1
+	extr	x17, x14, x13, #1
+	ADDSUBC	x12, x4, x8
+	ADDSUBC	x13, x5, x9
+	stp	x16, x17, [rp],#32
+	sub	x6, x6, #1
+	cbnz	x6, L(top)
+
+L(end):	extr	x16, x15, x14, #1
+	extr	x17, x12, x15, #1
+	stp	x16, x17, [rp,#-16]
+L(2):	cset	x14, COND
+	extr	x16, x13, x12, #1
+	extr	x17, x14, x13, #1
+	stp	x16, x17, [rp]
+
+L(ret):	mov	x0, x10
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/rshift.asm b/mpn/arm64/rshift.asm
new file mode 100644
index 0000000..10b0a04
--- /dev/null
+++ b/mpn/arm64/rshift.asm

@@ -0,0 +1,136 @@
+dnl  ARM64 mpn_rshift.
+
+dnl  Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb   assumed optimal c/l
+C Cortex-A53	3.5-4.0		 3.25
+C Cortex-A57	 2.0		 2.0
+C X-Gene	 2.67		 2.5
+
+C TODO
+C  * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes.  These
+C    numbers should be 1 and 0, respectively.  The str in wind-down should also
+C    go.
+C  * Using extr and with 63 separate loops we might reach 1.25 c/l on A57.
+C  * A53's speed depends on alignment, but not as simply as for lshift/lshiftc.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up',     `x1')
+define(`n',      `x2')
+define(`cnt',    `x3')
+
+define(`rp',     `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsr)
+define(`NSHIFT', lsl)
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	mov	rp, rp_arg
+	sub	tnc, xzr, cnt
+	lsr	x17, n, #2
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x5, [up]
+	tbnz	n, #1, L(b11)
+
+L(b01):	NSHIFT	x0, x5, tnc
+	PSHIFT	x2, x5, cnt
+	cbnz	x17, L(gt1)
+	str	x2, [rp]
+	ret
+L(gt1):	ldp	x4, x5, [up,#8]
+	sub	up, up, #8
+	sub	rp, rp, #32
+	b	L(lo2)
+
+L(b11):	NSHIFT	x0, x5, tnc
+	PSHIFT	x2, x5, cnt
+	ldp	x6, x7, [up,#8]!
+	sub	rp, rp, #16
+	b	L(lo3)
+
+L(bx0):	ldp	x4, x5, [up]
+	tbz	n, #1, L(b00)
+
+L(b10):	NSHIFT	x0, x4, tnc
+	PSHIFT	x13, x4, cnt
+	NSHIFT	x10, x5, tnc
+	PSHIFT	x2, x5, cnt
+	cbnz	x17, L(gt2)
+	orr	x10, x10, x13
+	stp	x10, x2, [rp]
+	ret
+L(gt2):	ldp	x4, x5, [up,#16]
+	orr	x10, x10, x13
+	str	x10, [rp],#-24
+	b	L(lo2)
+
+L(b00):	NSHIFT	x0, x4, tnc
+	PSHIFT	x13, x4, cnt
+	NSHIFT	x10, x5, tnc
+	PSHIFT	x2, x5, cnt
+	ldp	x6, x7, [up,#16]!
+	orr	x10, x10, x13
+	str	x10, [rp],#-8
+	b	L(lo0)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#16]
+	orr	x10, x10, x13
+	orr	x11, x12, x2
+	stp	x11, x10, [rp,#16]
+	PSHIFT	x2, x7, cnt
+L(lo2):	NSHIFT	x10, x5, tnc
+	NSHIFT	x12, x4, tnc
+	PSHIFT	x13, x4, cnt
+	ldp	x6, x7, [up,#32]!
+	orr	x10, x10, x13
+	orr	x11, x12, x2
+	stp	x11, x10, [rp,#32]!
+	PSHIFT	x2, x5, cnt
+L(lo0):	sub	x17, x17, #1
+L(lo3):	NSHIFT	x10, x7, tnc
+	NSHIFT	x12, x6, tnc
+	PSHIFT	x13, x6, cnt
+	cbnz	x17, L(top)
+
+L(end):	orr	x10, x10, x13
+	orr	x11, x12, x2
+	PSHIFT	x2, x7, cnt
+	stp	x11, x10, [rp,#16]
+	str	x2, [rp,#32]
+	ret
+EPILOGUE()

diff --git a/mpn/arm64/sec_tabselect.asm b/mpn/arm64/sec_tabselect.asm
new file mode 100644
index 0000000..5cbd3b2
--- /dev/null
+++ b/mpn/arm64/sec_tabselect.asm

@@ -0,0 +1,122 @@
+dnl  ARM64 Neon mpn_sec_tabselect.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2011-2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C Cortex-A53	 2.25
+C Cortex-A57	 1.33
+C X-Gene	 2
+
+C void
+C mpn_sec_tabselect (mp_ptr rp, mp_srcptr *tab,
+C		     mp_size_t n, mp_size_t nents, mp_size_t which)
+
+changecom(blah)
+
+define(`rp',     `x0')
+define(`tp',     `x1')
+define(`n',      `x2')
+define(`nents',  `x3')
+define(`which',  `x4')
+
+define(`i',      `x5')
+define(`j',      `x6')
+
+define(`maskq',  `v4')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+	dup	v7.2d, x4			C 2 `which' copies
+
+	mov	x10, #1
+	dup	v6.2d, x10			C 2 copies of 1
+
+	subs	j, n, #4
+	b.mi	L(outer_end)
+
+L(outer_top):
+	mov	i, nents
+	mov	x12, tp				C preserve tp
+	movi	v5.16b, #0			C zero 2 counter copies
+	movi	v2.16b, #0
+	movi	v3.16b, #0
+	ALIGN(16)
+L(tp4):	cmeq	maskq.2d, v5.2d, v7.2d		C compare idx copies to `which' copies
+	ld1	{v0.2d,v1.2d}, [tp]
+	add	v5.2d, v5.2d, v6.2d
+	bit	v2.16b, v0.16b, maskq.16b
+	bit	v3.16b, v1.16b, maskq.16b
+	add	tp, tp, n, lsl #3
+	sub	i, i, #1
+	cbnz	i, L(tp4)
+	st1	{v2.2d,v3.2d}, [rp], #32
+	add	tp, x12, #32			C restore tp, point to next slice
+	subs	j, j, #4
+	b.pl	L(outer_top)
+L(outer_end):
+
+	tbz	n, #1, L(b0x)
+	mov	i, nents
+	mov	x12, tp
+	movi	v5.16b, #0			C zero 2 counter copies
+	movi	v2.16b, #0
+	ALIGN(16)
+L(tp2):	cmeq	maskq.2d, v5.2d, v7.2d
+	ld1	{v0.2d}, [tp]
+	add	v5.2d, v5.2d, v6.2d
+	bit	v2.16b, v0.16b, maskq.16b
+	add	tp, tp, n, lsl #3
+	sub	i, i, #1
+	cbnz	i, L(tp2)
+	st1	{v2.2d}, [rp], #16
+	add	tp, x12, #16
+
+L(b0x):	tbz	n, #0, L(b00)
+	mov	i, nents
+	mov	x12, tp
+	movi	v5.16b, #0			C zero 2 counter copies
+	movi	v2.16b, #0
+	ALIGN(16)
+L(tp1):	cmeq	maskq.2d, v5.2d, v7.2d
+	ld1	{v0.1d}, [tp]
+	add	v5.2d, v5.2d, v6.2d		C FIXME size should be `1d'
+	bit	v2.8b, v0.8b, maskq.8b
+	add	tp, tp, n, lsl #3
+	sub	i, i, #1
+	cbnz	i, L(tp1)
+	st1	{v2.1d}, [rp], #8
+	add	tp, x12, #8
+
+L(b00):	ret
+EPILOGUE()

diff --git a/mpn/arm64/sqr_diag_addlsh1.asm b/mpn/arm64/sqr_diag_addlsh1.asm
new file mode 100644
index 0000000..b15daa9
--- /dev/null
+++ b/mpn/arm64/sqr_diag_addlsh1.asm

@@ -0,0 +1,102 @@
+dnl  ARM64 mpn_sqr_diag_addlsh1.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2016, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 5.65
+C Cortex-A57	 3.5
+C X-Gene	 3.38
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`tp', `x1')
+define(`up', `x2')
+define(`n',  `x3')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+	ldr	x15, [up],#8
+	lsr	x14, n, #1
+	tbz	n, #0, L(bx0)
+
+L(bx1):	adds	x7, xzr, xzr
+	mul	x12, x15, x15
+	ldr	x16, [up],#8
+	ldp	x4, x5, [tp],#16
+	umulh	x11, x15, x15
+	b	L(mid)
+
+L(bx0):	adds	x5, xzr, xzr
+	mul	x12, x15, x15
+	ldr	x17, [up],#16
+	ldp	x6, x7, [tp],#32
+	umulh	x11, x15, x15
+	sub	x14, x14, #1
+	cbz	x14, L(end)
+
+	ALIGN(16)
+L(top):	extr	x9, x6, x5, #63
+	mul	x10, x17, x17
+	ldr	x16, [up,#-8]
+	adcs	x13, x9, x11
+	ldp	x4, x5, [tp,#-16]
+	umulh	x11, x17, x17
+	extr	x8, x7, x6, #63
+	stp	x12, x13, [rp],#16
+	adcs	x12, x8, x10
+L(mid):	extr	x9, x4, x7, #63
+	mul	x10, x16, x16
+	ldr	x17, [up],#16
+	adcs	x13, x9, x11
+	ldp	x6, x7, [tp],#32
+	umulh	x11, x16, x16
+	extr	x8, x5, x4, #63
+	stp	x12, x13, [rp],#16
+	adcs	x12, x8, x10
+	sub	x14, x14, #1
+	cbnz	x14, L(top)
+
+L(end):	extr	x9, x6, x5, #63
+	mul	x10, x17, x17
+	adcs	x13, x9, x11
+	umulh	x11, x17, x17
+	extr	x8, x7, x6, #63
+	stp	x12, x13, [rp]
+	adcs	x12, x8, x10
+	extr	x9, xzr, x7, #63
+	adcs	x13, x9, x11
+	stp	x12, x13, [rp,#16]
+
+	ret
+EPILOGUE()

diff --git a/mpn/asm-defs.m4 b/mpn/asm-defs.m4
new file mode 100644
index 0000000..1f2d9fe
--- /dev/null
+++ b/mpn/asm-defs.m4

@@ -0,0 +1,1766 @@
+divert(-1)
+dnl
+dnl  m4 macros for gmp assembly code, shared by all CPUs.
+
+dnl  Copyright 1999-2006, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  These macros are designed for use with any m4 and have been used on
+dnl  GNU, FreeBSD, NetBSD, OpenBSD and SysV.
+dnl
+dnl  GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error
+dnl  messages.
+dnl
+dnl
+dnl  Macros:
+dnl
+dnl  Most new m4 specific macros have an "m4_" prefix to emphasise they're
+dnl  m4 expansions.  But new defining things like deflit() and defreg() are
+dnl  named like the builtin define(), and forloop() is named following the
+dnl  GNU m4 example on which it's based.
+dnl
+dnl  GNU m4 with the -P option uses "m4_" as a prefix for builtins, but that
+dnl  option isn't going to be used, so there's no conflict or confusion.
+dnl
+dnl
+dnl  Comments in output:
+dnl
+dnl  The m4 comment delimiters are left at # and \n, the normal assembler
+dnl  commenting for most CPUs.  m4 passes comment text through without
+dnl  expanding macros in it, which is generally a good thing since it stops
+dnl  unexpected expansions and possible resultant errors.
+dnl
+dnl  But note that when a quoted string is being read, a # isn't special, so
+dnl  apostrophes in comments in quoted strings must be avoided or they'll be
+dnl  interpreted as a closing quote mark.  But when the quoted text is
+dnl  re-read # will still act like a normal comment, suppressing macro
+dnl  expansion.
+dnl
+dnl  For example,
+dnl
+dnl          # apostrophes in comments that're outside quotes are ok
+dnl          # and using macro names like PROLOGUE is ok too
+dnl          ...
+dnl          ifdef(`PIC',`
+dnl                  # but apostrophes aren't ok inside quotes
+dnl                  #                     ^--wrong
+dnl                  ...
+dnl                  # though macro names like PROLOGUE are still ok
+dnl                  ...
+dnl          ')
+dnl
+dnl  If macro expansion in a comment is wanted, use `#' in the .asm (ie. a
+dnl  quoted hash symbol), which will turn into # in the .s but get
+dnl  expansions done on that line.  This can make the .s more readable to
+dnl  humans, but it won't make a blind bit of difference to the assembler.
+dnl
+dnl  All the above applies, mutatis mutandis, when changecom() is used to
+dnl  select @ ! ; or whatever other commenting.
+dnl
+dnl
+dnl  Variations in m4 affecting gmp:
+dnl
+dnl  $# - When a macro is called as "foo" with no brackets, BSD m4 sets $#
+dnl       to 1, whereas GNU or SysV m4 set it to 0.  In all cases though
+dnl       "foo()" sets $# to 1.  This is worked around in various places.
+dnl
+dnl  len() - When "len()" is given an empty argument, BSD m4 evaluates to
+dnl       nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0.
+dnl       See m4_length() below which works around this.
+dnl
+dnl  translit() - GNU m4 accepts character ranges like A-Z, and the new
+dnl       OpenBSD m4 does under option -g, but basic BSD and SysV don't.
+dnl
+dnl  popdef() - in BSD and SysV m4 popdef() takes multiple arguments and
+dnl       pops each, but GNU m4 only takes one argument.
+dnl
+dnl  push back - BSD m4 has some limits on the amount of text that can be
+dnl       pushed back.  The limit is reasonably big and so long as macros
+dnl       don't gratuitously duplicate big arguments it isn't a problem.
+dnl       Normally an error message is given, but sometimes it just hangs.
+dnl
+dnl  eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available,
+dnl       but BSD m4 doesn't (contrary to what the man page suggests) and
+dnl       instead ^ is exponentiation.
+dnl
+dnl  eval() ?: - The C ternary operator "?:" is available in BSD m4, but not
+dnl       in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5).
+dnl
+dnl  eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31
+dnl       (ie. -2147483648) gives "-(".  Using -2147483648 within an
+dnl       expression is ok, it just can't be a final result.  "-(" will of
+dnl       course upset parsing, with all sorts of strange effects.
+dnl
+dnl  eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on
+dnl       Solaris 7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't).  See
+dnl       m4_lshift() and m4_rshift() below for workarounds.
+dnl
+dnl  ifdef() - OSF 4.0 m4 considers a macro defined to a zero value `0' or
+dnl       `00' etc as not defined.  See m4_ifdef below for a workaround.
+dnl
+dnl  m4wrap() sequence - in BSD m4, m4wrap() replaces any previous m4wrap()
+dnl       string, in SysV m4 it appends to it, and in GNU m4 it prepends.
+dnl       See m4wrap_prepend() below which brings uniformity to this.
+dnl
+dnl  m4wrap() 0xFF - old versions of BSD m4 store EOF in a C "char" under an
+dnl       m4wrap() and on systems where char is unsigned by default a
+dnl       spurious 0xFF is output.  This has been observed on recent Cray
+dnl       Unicos Alpha, Apple MacOS X, and HPUX 11 systems.  An autoconf
+dnl       test is used to check for this, see the m4wrap handling below.  It
+dnl       might work to end the m4wrap string with a dnl to consume the
+dnl       0xFF, but that probably induces the offending m4's to read from an
+dnl       already closed "FILE *", which could be bad on a glibc style
+dnl       stdio.
+dnl
+dnl  __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and
+dnl       they're used here to make error messages more informative.  GNU m4
+dnl       gives an unhelpful "NONE 0" in an m4wrap(), but that's worked
+dnl       around.
+dnl
+dnl  __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the
+dnl       filename in __file__, so care should be taken that no macro has
+dnl       the same name as a file, or an unwanted expansion will occur when
+dnl       printing an error or warning.
+dnl
+dnl  changecom() - BSD m4 changecom doesn't quite work like the man page
+dnl       suggests, in particular "changecom" or "changecom()" doesn't
+dnl       disable the comment feature, and multi-character comment sequences
+dnl       don't seem to work.  If the default `#' and newline aren't
+dnl       suitable it's necessary to change it to something else,
+dnl       eg. changecom(;).
+dnl
+dnl  OpenBSD 2.6 m4 - in this m4, eval() rejects decimal constants containing
+dnl       an 8 or 9, making it pretty much unusable.  The bug is confined to
+dnl       version 2.6 (it's not in 2.5, and was fixed in 2.7).
+dnl
+dnl  SunOS /usr/bin/m4 - this m4 lacks a number of desired features,
+dnl       including $# and $@, defn(), m4exit(), m4wrap(), pushdef(),
+dnl       popdef().  /usr/5bin/m4 is a SysV style m4 which should always be
+dnl       available, and "configure" will reject /usr/bin/m4 in favour of
+dnl       /usr/5bin/m4 (if necessary).
+dnl
+dnl       The sparc code actually has modest m4 requirements currently and
+dnl       could manage with /usr/bin/m4, but there's no reason to put our
+dnl       macros through contortions when /usr/5bin/m4 is available or GNU
+dnl       m4 can be installed.
+
+
+ifdef(`__ASM_DEFS_M4_INCLUDED__',
+`m4_error(`asm-defs.m4 already included, dont include it twice
+')m4exit(1)')
+define(`__ASM_DEFS_M4_INCLUDED__')
+
+
+dnl  Detect and give a message about the unsuitable OpenBSD 2.6 m4.
+
+ifelse(eval(89),89,,
+`errprint(
+`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable.
+This is probably OpenBSD 2.6 m4 (September 1999).  Upgrade to OpenBSD 2.7,
+or get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4.  Dont forget
+to configure with M4=/wherever/m4 if you install one of these in a directory
+not in $PATH.
+')m4exit(1)')
+
+
+dnl  Detect and give a message about the unsuitable SunOS /usr/bin/m4.
+dnl
+dnl  Unfortunately this test doesn't work when m4 is run in the normal way
+dnl  from mpn/Makefile with "m4 -DOPERATION_foo foo.asm", since the bad m4
+dnl  takes "-" in "-D..." to mean read stdin, so it will look like it just
+dnl  hangs.  But running "m4 asm-defs.m4" to try it out will work.
+dnl
+dnl  We'd like to abort immediately on finding a problem, but unfortunately
+dnl  the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill
+dnl  it.  Unexpanded $#'s in some m4_assert_numargs() later on will comment
+dnl  out some closing parentheses and kill it with "m4: arg stack overflow".
+
+define(m4_dollarhash_works_test,``$#'')
+ifelse(m4_dollarhash_works_test(x),1,,
+`errprint(
+`This m4 doesnt support $# and cant be used for GMP asm processing.
+If this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that
+or can get it, otherwise install GNU m4.  Dont forget to configure with
+M4=/wherever/m4 if you install in a directory not in $PATH.
+')')
+undefine(`m4_dollarhash_works_test')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Basic error handling things.
+
+
+dnl  Usage: m4_dollarhash_1_if_noparen_p
+dnl
+dnl  Expand to 1 if a call "foo" gives $# set to 1 (as opposed to 0 like GNU
+dnl  and SysV m4 give).
+
+define(m4_dollarhash_1_if_noparen_test,`$#')
+define(m4_dollarhash_1_if_noparen_p,
+eval(m4_dollarhash_1_if_noparen_test==1))
+undefine(`m4_dollarhash_1_if_noparen_test')
+
+
+dnl  Usage: m4wrap_prepend(string)
+dnl
+dnl  Prepend the given string to what will be expanded under m4wrap at the
+dnl  end of input.
+dnl
+dnl  This macro exists to work around variations in m4wrap() behaviour in
+dnl  the various m4s (notes at the start of this file).  Don't use m4wrap()
+dnl  directly since it will interfere with this scheme.
+
+define(m4wrap_prepend,
+m4_assert_numargs(1)
+`define(`m4wrap_string',`$1'defn(`m4wrap_string'))')
+
+define(m4wrap_string,`')
+
+define(m4wrap_works_p,
+`ifelse(M4WRAP_SPURIOUS,yes,0,1)')
+
+ifelse(m4wrap_works_p,1,
+`m4wrap(`m4wrap_string')')
+
+
+dnl  Usage: m4_file_and_line
+dnl
+dnl  Expand to the current file and line number, if the GNU m4 extensions
+dnl  __file__ and __line__ are available.
+dnl
+dnl  In GNU m4 1.4 at the end of input when m4wrap text is expanded,
+dnl  __file__ is NONE and __line__ is 0, which is not a helpful thing to
+dnl  print.  If m4_file_seen() has been called to note the last file seen,
+dnl  then that file at a big line number is used, otherwise "end of input"
+dnl  is used (although "end of input" won't parse as an error message).
+
+define(m4_file_and_line,
+`ifdef(`__file__',
+`ifelse(__file__`'__line__,`NONE0',
+`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')',
+`__file__: __line__: ')')')
+
+
+dnl  Usage: m4_errprint_commas(arg,...)
+dnl
+dnl  The same as errprint(), but commas are printed between arguments
+dnl  instead of spaces.
+
+define(m4_errprint_commas,
+`errprint(`$1')dnl
+ifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')')
+
+
+dnl  Usage: m4_error(args...)
+dnl         m4_warning(args...)
+dnl
+dnl  Print an error message, using m4_errprint_commas, prefixed with the
+dnl  current filename and line number (if available).  m4_error sets up to
+dnl  give an error exit at the end of processing, m4_warning just prints.
+dnl  These macros are the recommended way to print errors.
+dnl
+dnl  The arguments here should be quoted in the usual way to prevent them
+dnl  being expanded when the macro call is read.  (m4_error takes care not
+dnl  to do any further expansion.)
+dnl
+dnl  For example,
+dnl
+dnl         m4_error(`some error message
+dnl         ')
+dnl
+dnl  which prints
+dnl
+dnl         foo.asm:123: some error message
+dnl
+dnl  or if __file__ and __line__ aren't available
+dnl
+dnl         some error message
+dnl
+dnl  The "file:line:" format is a basic style, used by gcc and GNU m4, so
+dnl  emacs and other editors will recognise it in their normal error message
+dnl  parsing.
+
+define(m4_warning,
+`m4_errprint_commas(m4_file_and_line`'$@)')
+
+define(m4_error,
+`define(`m4_error_occurred',1)m4_warning($@)dnl
+ifelse(m4wrap_works_p,0,`m4exit(1)')')
+
+define(`m4_error_occurred',0)
+
+dnl  This m4wrap_prepend() is first, so it'll be executed last.
+m4wrap_prepend(
+`ifelse(m4_error_occurred,1,
+`m4_error(`Errors occurred during m4 processing
+')m4exit(1)')')
+
+
+dnl  Usage: m4_assert_numargs(num)
+dnl
+dnl  Put this unquoted on a line on its own at the start of a macro
+dnl  definition to add some code to check that num many arguments get passed
+dnl  to the macro.  For example,
+dnl
+dnl         define(foo,
+dnl         m4_assert_numargs(2)
+dnl         `something `$1' and `$2' blah blah')
+dnl
+dnl  Then a call like foo(one,two,three) will provoke an error like
+dnl
+dnl         file:10: foo expected 2 arguments, got 3 arguments
+dnl
+dnl  Here are some calls and how many arguments they're interpreted as passing.
+dnl
+dnl         foo(abc,def)  2
+dnl         foo(xyz)      1
+dnl         foo()         0
+dnl         foo          -1
+dnl
+dnl  The -1 for no parentheses at all means a macro that's meant to be used
+dnl  that way can be checked with m4_assert_numargs(-1).  For example,
+dnl
+dnl         define(SPECIAL_SUFFIX,
+dnl         m4_assert_numargs(-1)
+dnl         `ifdef(`FOO',`_foo',`_bar')')
+dnl
+dnl  But as an alternative see also deflit() below where parenthesized
+dnl  expressions following a macro are passed through to the output.
+dnl
+dnl  Note that in BSD m4 there's no way to differentiate calls "foo" and
+dnl  "foo()", so in BSD m4 the distinction between the two isn't enforced.
+dnl  (In GNU and SysV m4 it can be checked, and is.)
+
+
+dnl  m4_assert_numargs is able to check its own arguments by calling
+dnl  assert_numargs_internal directly.
+dnl
+dnl  m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand
+dnl  to `$`'0' and do the wrong thing, and likewise for $1.  The same is
+dnl  done in other assert macros.
+dnl
+dnl  $`#' leaves $# in the new macro being defined, and stops # being
+dnl  interpreted as a comment character.
+dnl
+dnl  `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is
+dnl  used.  The space means that if there is a dnl it'll still work.
+
+dnl  Usage: m4_doublequote(x) expands to ``x''
+define(m4_doublequote,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''')
+
+define(m4_assert_numargs,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl
+`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl  Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1'))
+define(m4_assert_numargs_internal,
+`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))')
+
+dnl  Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs)
+dnl
+dnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl  should be -1.  If wantargs is -1 but gotargs is 0 and the two can't be
+dnl  distinguished then it's allowed to pass.
+dnl
+define(m4_assert_numargs_internal_check,
+`ifelse(eval($2 == $3
+             || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0,
+`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3')
+)')')
+
+dnl  Called: m4_numargs_count($#,len(`$1'))
+dnl  If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise
+dnl  $# args.
+define(m4_numargs_count,
+`ifelse($1,0, -1,
+`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')')
+
+dnl  Usage: m4_Narguments(N)
+dnl  "$1 argument" or "$1 arguments" with the plural according to $1.
+define(m4_Narguments,
+`$1 argument`'ifelse(`$1',1,,s)')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Additional error checking things.
+
+
+dnl  Usage: m4_file_seen()
+dnl
+dnl  Record __file__ for the benefit of m4_file_and_line in m4wrap text.
+dnl
+dnl  The basic __file__ macro comes out quoted in GNU m4, like `foo.asm',
+dnl  and m4_file_seen_last is defined like that too.
+dnl
+dnl  This is used by PROLOGUE, since that's normally in the main .asm file,
+dnl  and in particular it sets up m4wrap error checks for missing EPILOGUE.
+
+define(m4_file_seen,
+m4_assert_numargs(0)
+`ifelse(__file__,`NONE',,
+`define(`m4_file_seen_last',m4_doublequote(__file__))')')
+
+
+dnl  Usage: m4_assert_onearg()
+dnl
+dnl  Put this, unquoted, at the start of a macro definition to add some code
+dnl  to check that one argument is passed to the macro, but with that
+dnl  argument allowed to be empty.  For example,
+dnl
+dnl          define(foo,
+dnl          m4_assert_onearg()
+dnl          `blah blah $1 blah blah')
+dnl
+dnl  Calls "foo(xyz)" or "foo()" are accepted.  A call "foo(xyz,abc)" fails.
+dnl  A call "foo" fails too, but BSD m4 can't detect this case (GNU and SysV
+dnl  m4 can).
+
+define(m4_assert_onearg,
+m4_assert_numargs(0)
+`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ')
+
+dnl  Called: m4_assert_onearg(`macroname',$#)
+define(m4_assert_onearg_internal,
+`ifelse($2,1,,
+`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2')
+)')')
+
+
+dnl  Usage: m4_assert_numargs_range(low,high)
+dnl
+dnl  Put this, unquoted, at the start of a macro definition to add some code
+dnl  to check that between low and high many arguments get passed to the
+dnl  macro.  For example,
+dnl
+dnl         define(foo,
+dnl         m4_assert_numargs_range(3,5)
+dnl         `mandatory $1 $2 $3 optional $4 $5 end')
+dnl
+dnl  See m4_assert_numargs() for more info.
+
+define(m4_assert_numargs_range,
+m4_assert_numargs(2)
+``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl  Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1'))
+define(m4_assert_numargs_range_internal,
+m4_assert_numargs(5)
+`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))')
+
+dnl  Called: m4_assert_numargs_range_check(`name',low,high,gotargs)
+dnl
+dnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl  should be -1.  To ensure a `high' of -1 works, a fudge is applied to
+dnl  gotargs if it's 0 and the 0 and -1 cases can't be distinguished.
+dnl
+define(m4_assert_numargs_range_check,
+m4_assert_numargs(4)
+`ifelse(eval($2 <= $4 &&
+             ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0,
+`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4')
+)')')
+
+
+dnl  Usage: m4_assert_defined(symbol)
+dnl
+dnl  Put this unquoted on a line of its own at the start of a macro
+dnl  definition to add some code to check that the given symbol is defined
+dnl  when the macro is used.  For example,
+dnl
+dnl          define(foo,
+dnl          m4_assert_defined(`FOO_PREFIX')
+dnl          `FOO_PREFIX whatever')
+dnl
+dnl  This is a convenient way to check that the user or ./configure or
+dnl  whatever has defined the things needed by a macro, as opposed to
+dnl  silently generating garbage.
+
+define(m4_assert_defined,
+m4_assert_numargs(1)
+``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '')
+
+dnl  Called: m4_assert_defined_internal(`macroname',`define_required')
+define(m4_assert_defined_internal,
+m4_assert_numargs(2)
+`m4_ifdef(`$2',,
+`m4_error(`$1 needs $2 defined
+')')')
+
+
+dnl  Usage: m4_not_for_expansion(`SYMBOL')
+dnl         define_not_for_expansion(`SYMBOL')
+dnl
+dnl  m4_not_for_expansion turns SYMBOL, if defined, into something which
+dnl  will give an error if expanded.  For example,
+dnl
+dnl         m4_not_for_expansion(`PIC')
+dnl
+dnl  define_not_for_expansion is the same, but always makes a definition.
+dnl
+dnl  These are for symbols that should be tested with ifdef(`FOO',...)
+dnl  rather than be expanded as such.  They guard against accidentally
+dnl  omitting the quotes, as in ifdef(FOO,...).  Note though that they only
+dnl  catches this when FOO is defined, so be sure to test code both with and
+dnl  without each definition.
+
+define(m4_not_for_expansion,
+m4_assert_numargs(1)
+`ifdef(`$1',`define_not_for_expansion(`$1')')')
+
+define(define_not_for_expansion,
+m4_assert_numargs(1)
+`ifelse(defn(`$1'),,,
+`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion()
+')')dnl
+define(`$1',`m4_not_for_expansion_internal(`$1')')')
+
+define(m4_not_for_expansion_internal,
+`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)'
+')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various generic m4 things.
+
+
+dnl  Usage: m4_unquote(macro)
+dnl
+dnl  Allow the argument text to be re-evaluated.  This is useful for "token
+dnl  pasting" like m4_unquote(foo`'bar).
+
+define(m4_unquote,
+m4_assert_onearg()
+`$1')
+
+
+dnl  Usage: m4_ifdef(name,yes[,no])
+dnl
+dnl  Expand to the yes argument if name is defined, or to the no argument if
+dnl  not.
+dnl
+dnl  This is the same as the builtin "ifdef", but avoids an OSF 4.0 m4 bug
+dnl  in which a macro with a zero value `0' or `00' etc is considered not
+dnl  defined.
+dnl
+dnl  There's no particular need to use this everywhere, only if there might
+dnl  be a zero value.
+
+define(m4_ifdef,
+m4_assert_numargs_range(2,3)
+`ifelse(eval(ifdef(`$1',1,0)+m4_length(defn(`$1'))),0,
+`$3',`$2')')
+
+
+dnl  Usage: m4_ifdef_anyof_p(`symbol',...)
+dnl
+dnl  Expand to 1 if any of the symbols in the argument list are defined, or
+dnl  to 0 if not.
+
+define(m4_ifdef_anyof_p,
+`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0,
+`ifdef(`$1', 1,
+`m4_ifdef_anyof_p(shift($@))')')')
+
+
+dnl  Usage: m4_length(string)
+dnl
+dnl  Determine the length of a string.  This is the same as len(), but
+dnl  always expands to a number, working around the BSD len() which
+dnl  evaluates to nothing given an empty argument.
+
+define(m4_length,
+m4_assert_onearg()
+`eval(len(`$1')-0)')
+
+
+dnl  Usage: m4_stringequal_p(x,y)
+dnl
+dnl  Expand to 1 or 0 according as strings x and y are equal or not.
+
+define(m4_stringequal_p,
+`ifelse(`$1',`$2',1,0)')
+
+
+dnl  Usage: m4_incr_or_decr(n,last)
+dnl
+dnl  Do an incr(n) or decr(n), whichever is in the direction of "last".
+dnl  Both n and last must be numbers of course.
+
+define(m4_incr_or_decr,
+m4_assert_numargs(2)
+`ifelse(eval($1<$2),1,incr($1),decr($1))')
+
+
+dnl  Usage: forloop(i, first, last, statement)
+dnl
+dnl  Based on GNU m4 examples/forloop.m4, but extended.
+dnl
+dnl  statement is expanded repeatedly, with i successively defined as
+dnl
+dnl         first, first+1, ..., last-1, last
+dnl
+dnl  Or if first > last, then it's
+dnl
+dnl         first, first-1, ..., last+1, last
+dnl
+dnl  If first == last, then one expansion is done.
+dnl
+dnl  A pushdef/popdef of i is done to preserve any previous definition (or
+dnl  lack of definition).  first and last are eval()ed and so can be
+dnl  expressions.
+dnl
+dnl  forloop_first is defined to 1 on the first iteration, 0 on the rest.
+dnl  forloop_last is defined to 1 on the last iteration, 0 on the others.
+dnl  Nested forloops are allowed, in which case forloop_first and
+dnl  forloop_last apply to the innermost loop that's open.
+dnl
+dnl  A simple example,
+dnl
+dnl         forloop(i, 1, 2*2+1, `dnl
+dnl         iteration number i ... ifelse(forloop_first,1,FIRST)
+dnl         ')
+
+
+dnl  "i" and "statement" are carefully quoted, but "first" and "last" are
+dnl  just plain numbers once eval()ed.
+
+define(`forloop',
+m4_assert_numargs(4)
+`pushdef(`$1',eval(`$2'))dnl
+pushdef(`forloop_first',1)dnl
+pushdef(`forloop_last',0)dnl
+forloop_internal(`$1',eval(`$3'),`$4')`'dnl
+popdef(`forloop_first')dnl
+popdef(`forloop_last')dnl
+popdef(`$1')')
+
+dnl  Called: forloop_internal(`var',last,statement)
+define(`forloop_internal',
+m4_assert_numargs(3)
+`ifelse($1,$2,
+`define(`forloop_last',1)$3',
+`$3`'dnl
+define(`forloop_first',0)dnl
+define(`$1',m4_incr_or_decr($1,$2))dnl
+forloop_internal(`$1',$2,`$3')')')
+
+
+dnl  Usage: foreach(var,body, item1,item2,...,itemN)
+dnl
+dnl  For each "item" argument, define "var" to that value and expand "body".
+dnl  For example,
+dnl
+dnl         foreach(i, `something i
+dnl         ', one, two)
+dnl  gives
+dnl         something one
+dnl         something two
+dnl
+dnl  Any previous definition of "var", or lack thereof, is saved and
+dnl  restored.  Empty "item"s are not allowed.
+
+define(foreach,
+m4_assert_numargs_range(2,1000)
+`ifelse(`$3',,,
+`pushdef(`$1',`$3')$2`'popdef(`$1')dnl
+foreach(`$1',`$2',shift(shift(shift($@))))')')
+
+
+dnl  Usage: m4_toupper(x)
+dnl         m4_tolower(x)
+dnl
+dnl  Convert the argument string to upper or lower case, respectively.
+dnl  Only one argument accepted.
+dnl
+dnl  BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet
+dnl  is written out.
+
+define(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz')
+define(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+
+define(m4_toupper,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)')
+
+define(m4_tolower,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)')
+
+
+dnl  Usage: m4_empty_if_zero(x)
+dnl
+dnl  Evaluate to x, or to nothing if x is 0.  x is eval()ed and so can be an
+dnl  expression.
+dnl
+dnl  This is useful for x86 addressing mode displacements since forms like
+dnl  (%ebx) are one byte shorter than 0(%ebx).  A macro `foo' for use as
+dnl  foo(%ebx) could be defined with the following so it'll be empty if the
+dnl  expression comes out zero.
+dnl
+dnl	   deflit(`foo', `m4_empty_if_zero(a+b*4-c)')
+dnl
+dnl  Naturally this shouldn't be done if, say, a computed jump depends on
+dnl  the code being a particular size.
+
+define(m4_empty_if_zero,
+m4_assert_onearg()
+`ifelse(eval($1),0,,eval($1))')
+
+
+dnl  Usage: m4_log2(x)
+dnl
+dnl  Calculate a logarithm to base 2.
+dnl  x must be an integral power of 2, between 2**0 and 2**30.
+dnl  x is eval()ed, so it can be an expression.
+dnl  An error results if x is invalid.
+dnl
+dnl  2**31 isn't supported, because an unsigned 2147483648 is out of range
+dnl  of a 32-bit signed int.  Also, the bug in BSD m4 where an eval()
+dnl  resulting in 2147483648 (or -2147483648 as the case may be) gives `-('
+dnl  means tests like eval(1<<31==(x)) would be necessary, but that then
+dnl  gives an unattractive explosion of eval() error messages if x isn't
+dnl  numeric.
+
+define(m4_log2,
+m4_assert_numargs(1)
+`m4_log2_internal(0,1,eval(`$1'))')
+
+dnl  Called: m4_log2_internal(n,2**n,target)
+define(m4_log2_internal,
+m4_assert_numargs(3)
+`ifelse($2,$3,$1,
+`ifelse($1,30,
+`m4_error(`m4_log2() argument too big or not a power of two: $3
+')',
+`m4_log2_internal(incr($1),eval(2*$2),$3)')')')
+
+
+dnl  Usage:  m4_div2_towards_zero
+dnl
+dnl  m4 division is probably whatever a C signed division is, and C doesn't
+dnl  specify what rounding gets used on negatives, so this expression forces
+dnl  a rounding towards zero.
+
+define(m4_div2_towards_zero,
+m4_assert_numargs(1)
+`eval((($1) + ((($1)<0) & ($1))) / 2)')
+
+
+dnl  Usage: m4_lshift(n,count)
+dnl         m4_rshift(n,count)
+dnl
+dnl  Calculate n shifted left or right by count many bits.  Both n and count
+dnl  are eval()ed and so can be expressions.
+dnl
+dnl  Negative counts are allowed and mean a shift in the opposite direction.
+dnl  Negative n is allowed and right shifts will be arithmetic (meaning
+dnl  divide by 2**count, rounding towards zero, also meaning the sign bit is
+dnl  duplicated).
+dnl
+dnl  Use these macros instead of << and >> in eval() since the basic ccs
+dnl  SysV m4 doesn't have those operators.
+
+define(m4_rshift,
+m4_assert_numargs(2)
+`m4_lshift(`$1',-(`$2'))')
+
+define(m4_lshift,
+m4_assert_numargs(2)
+`m4_lshift_internal(eval(`$1'),eval(`$2'))')
+
+define(m4_lshift_internal,
+m4_assert_numargs(2)
+`ifelse(eval($2-0==0),1,$1,
+`ifelse(eval($2>0),1,
+`m4_lshift_internal(eval($1*2),decr($2))',
+`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')')
+
+
+dnl  Usage: m4_popcount(n)
+dnl
+dnl  Expand to the number 1 bits in n.
+
+define(m4_popcount,
+m4_assert_numargs(1)
+`m4_popcount_internal(0,eval(`$1'))')
+
+dnl  Called: m4_popcount_internal(count,rem)
+define(m4_popcount_internal,
+m4_assert_numargs(2)
+`ifelse($2,0,$1,
+`m4_popcount_internal(eval($1+($2%2)),eval($2/2))')')
+
+
+dnl  Usage: m4_count_trailing_zeros(N)
+dnl
+dnl  Determine the number of trailing zero bits on N.  N is eval()ed and so
+dnl  can be an expression.  If N is zero an error is generated.
+
+define(m4_count_trailing_zeros,
+m4_assert_numargs(1)
+`m4_count_trailing_zeros_internal(eval(`$1'),0)')
+
+dnl  Called: m4_count_trailing_zeros_internal(val,count)
+define(m4_count_trailing_zeros_internal,
+m4_assert_numargs(2)
+`ifelse($1,0,
+`m4_error(`m4_count_trailing_zeros() given a zero value')',
+`ifelse(eval(($1)%2),1,`$2',
+`m4_count_trailing_zeros_internal(eval($1/2),incr($2))')')')
+
+
+dnl  Usage: deflit(name,value)
+dnl
+dnl  Like define(), but "name" expands like a literal, rather than taking
+dnl  arguments.  For example "name(%eax)" expands to "value(%eax)".
+dnl
+dnl  Limitations:
+dnl
+dnl  $ characters in the value part must have quotes to stop them looking
+dnl  like macro parameters.  For example, deflit(reg,`123+$`'4+567').  See
+dnl  defreg() below for handling simple register definitions like $7 etc.
+dnl
+dnl  "name()" is turned into "name", unfortunately.  In GNU and SysV m4 an
+dnl  error is generated when this happens, but in BSD m4 it will happen
+dnl  silently.  The problem is that in BSD m4 $# is 1 in both "name" or
+dnl  "name()", so there's no way to differentiate them.  Because we want
+dnl  plain "name" to turn into plain "value", we end up with "name()"
+dnl  turning into plain "value" too.
+dnl
+dnl  "name(foo)" will lose any whitespace after commas in "foo", for example
+dnl  "disp(%eax, %ecx)" would become "128(%eax,%ecx)".
+dnl
+dnl  These parentheses oddities shouldn't matter in assembler text, but if
+dnl  they do the suggested workaround is to write "name ()" or "name (foo)"
+dnl  to stop the parentheses looking like a macro argument list.  If a space
+dnl  isn't acceptable in the output, then write "name`'()" or "name`'(foo)".
+dnl  The `' is stripped when read, but again stops the parentheses looking
+dnl  like parameters.
+
+dnl  Quoting for deflit_emptyargcheck is similar to m4_assert_numargs.  The
+dnl  stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro
+dnl  created, not in deflit.
+define(deflit,
+m4_assert_numargs(2)
+`define(`$1',
+`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl
+$2`'dnl
+ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
+
+dnl  Called: deflit_emptyargcheck(macroname,$#,`$1')
+define(deflit_emptyargcheck,
+`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-defs.m4 for more information)
+')')')
+
+
+dnl  Usage: m4_assert(`expr')
+dnl
+dnl  Test a compile-time requirement with an m4 expression.  The expression
+dnl  should be quoted, and will be eval()ed and expected to be non-zero.
+dnl  For example,
+dnl
+dnl         m4_assert(`FOO*2+6 < 14')
+
+define(m4_assert,
+m4_assert_numargs(1)
+`ifelse(eval($1),1,,
+`m4_error(`assertion failed: $1
+')')')
+
+
+dnl  Usage: m4_repeat(count,text)
+dnl
+dnl  Expand to the given repetitions of the given text.  A zero count is
+dnl  allowed, and expands to nothing.
+
+define(m4_repeat,
+m4_assert_numargs(2)
+`m4_repeat_internal(eval($1),`$2')')
+
+define(m4_repeat_internal,
+m4_assert_numargs(2)
+`ifelse(`$1',0,,
+`forloop(m4_repeat_internal_counter,1,$1,``$2'')')')
+
+
+dnl  Usage: m4_hex_lowmask(bits)
+dnl
+dnl  Generate a hex constant which is a low mask of the given number of
+dnl  bits.  For example m4_hex_lowmask(10) would give 0x3ff.
+
+define(m4_hex_lowmask,
+m4_assert_numargs(1)
+`m4_cpu_hex_constant(m4_hex_lowmask_internal1(eval(`$1')))')
+
+dnl  Called: m4_hex_lowmask_internal1(bits)
+define(m4_hex_lowmask_internal1,
+m4_assert_numargs(1)
+`ifelse($1,0,`0',
+`m4_hex_lowmask_internal2(eval(($1)%4),eval(($1)/4))')')
+
+dnl  Called: m4_hex_lowmask_internal(remainder,digits)
+define(m4_hex_lowmask_internal2,
+m4_assert_numargs(2)
+`ifelse($1,1,`1',
+`ifelse($1,2,`3',
+`ifelse($1,3,`7')')')dnl
+m4_repeat($2,`f')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  The following m4_list functions take a list as multiple arguments.
+dnl  Arguments are evaluated multiple times, there's no attempt at strict
+dnl  quoting.  Empty list elements are not allowed, since an empty final
+dnl  argument is ignored.  These restrictions don't affect the current uses,
+dnl  and make the implementation easier.
+
+
+dnl  Usage: m4_list_quote(list,...)
+dnl
+dnl  Produce a list with quoted commas, so it can be a single argument
+dnl  string.  For instance m4_list_quote(a,b,c) gives
+dnl
+dnl         a`,'b`,'c`,'
+dnl
+dnl  This can be used to put a list in a define,
+dnl
+dnl         define(foolist, m4_list_quote(a,b,c))
+dnl
+dnl  Which can then be used for instance as
+dnl
+dnl         m4_list_find(target, foolist)
+
+define(m4_list_quote,
+`ifelse(`$1',,,
+`$1`,'m4_list_quote(shift($@))')')
+
+
+dnl  Usage: m4_list_find(key,list,...)
+dnl
+dnl  Evaluate to 1 or 0 according to whether key is in the list elements.
+
+define(m4_list_find,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,0,
+`ifelse(`$1',`$2',1,
+`m4_list_find(`$1',shift(shift($@)))')')')
+
+
+dnl  Usage: m4_list_remove(key,list,...)
+dnl
+dnl  Evaluate to the given list with `key' removed (if present).
+
+define(m4_list_remove,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,,
+`ifelse(`$1',`$2',,`$2,')dnl
+m4_list_remove(`$1',shift(shift($@)))')')
+
+
+dnl  Usage: m4_list_first(list,...)
+dnl
+dnl  Evaluate to the first element of the list (if any).
+
+define(m4_list_first,`$1')
+
+
+dnl  Usage: m4_list_count(list,...)
+dnl
+dnl  Evaluate to the number of elements in the list.  This can't just use $#
+dnl  because the last element might be empty.
+
+define(m4_list_count,
+`m4_list_count_internal(0,$@)')
+
+dnl  Called: m4_list_internal(count,list,...)
+define(m4_list_count_internal,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,$1,
+`m4_list_count_internal(eval($1+1),shift(shift($@)))')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various assembler things, not specific to any particular CPU.
+dnl
+
+
+dnl  Usage: include_mpn(`filename')
+dnl
+dnl  Like include(), but adds a path to the mpn source directory.  For
+dnl  example,
+dnl
+dnl         include_mpn(`sparc64/addmul_1h.asm')
+
+define(include_mpn,
+m4_assert_numargs(1)
+m4_assert_defined(`CONFIG_TOP_SRCDIR')
+`include(CONFIG_TOP_SRCDIR`/mpn/$1')')
+
+
+dnl  Usage: C comment ...
+dnl
+dnl  This works like a FORTRAN-style comment character.  It can be used for
+dnl  comments to the right of assembly instructions, where just dnl would
+dnl  remove the newline and concatenate adjacent lines.
+dnl
+dnl  C and/or dnl are useful when an assembler doesn't support comments, or
+dnl  where different assemblers for a particular CPU need different styles.
+dnl  The intermediate ".s" files will end up with no comments, just code.
+dnl
+dnl  Using C is not intended to cause offence to anyone who doesn't like
+dnl  FORTRAN; but if that happens it's an unexpected bonus.
+dnl
+dnl  During development, if comments are wanted in the .s files to help see
+dnl  what's expanding where, C can be redefined with something like
+dnl
+dnl         define(`C',`#')
+
+define(C, `
+dnl')
+
+
+dnl  Normally PIC is defined (or not) by libtool, but it doesn't set it on
+dnl  systems which are always PIC.  PIC_ALWAYS established in config.m4
+dnl  identifies these for us.
+
+ifelse(PIC_ALWAYS,`yes',`define(`PIC')')
+
+
+dnl  Various possible defines passed from the Makefile that are to be tested
+dnl  with ifdef() rather than be expanded.
+
+m4_not_for_expansion(`PIC')
+m4_not_for_expansion(`DLL_EXPORT')
+
+dnl  aors_n
+m4_not_for_expansion(`OPERATION_add_n')
+m4_not_for_expansion(`OPERATION_sub_n')
+
+dnl  aors_err1_n
+m4_not_for_expansion(`OPERATION_add_err1_n')
+m4_not_for_expansion(`OPERATION_sub_err1_n')
+
+dnl  aors_err2_n
+m4_not_for_expansion(`OPERATION_add_err2_n')
+m4_not_for_expansion(`OPERATION_sub_err2_n')
+
+dnl  aors_err3_n
+m4_not_for_expansion(`OPERATION_add_err3_n')
+m4_not_for_expansion(`OPERATION_sub_err3_n')
+
+dnl  aorsmul_1
+m4_not_for_expansion(`OPERATION_addmul_1')
+m4_not_for_expansion(`OPERATION_submul_1')
+
+dnl  logops_n
+m4_not_for_expansion(`OPERATION_and_n')
+m4_not_for_expansion(`OPERATION_andn_n')
+m4_not_for_expansion(`OPERATION_nand_n')
+m4_not_for_expansion(`OPERATION_ior_n')
+m4_not_for_expansion(`OPERATION_iorn_n')
+m4_not_for_expansion(`OPERATION_nior_n')
+m4_not_for_expansion(`OPERATION_xor_n')
+m4_not_for_expansion(`OPERATION_xnor_n')
+
+dnl  popham
+m4_not_for_expansion(`OPERATION_popcount')
+m4_not_for_expansion(`OPERATION_hamdist')
+
+dnl  lorrshift
+m4_not_for_expansion(`OPERATION_lshift')
+m4_not_for_expansion(`OPERATION_rshift')
+
+dnl  aorslsh1_n
+m4_not_for_expansion(`OPERATION_addlsh1_n')
+m4_not_for_expansion(`OPERATION_sublsh1_n')
+m4_not_for_expansion(`OPERATION_rsblsh1_n')
+
+dnl  aorslsh2_n
+m4_not_for_expansion(`OPERATION_addlsh2_n')
+m4_not_for_expansion(`OPERATION_sublsh2_n')
+m4_not_for_expansion(`OPERATION_rsblsh2_n')
+
+dnl  rsh1aors_n
+m4_not_for_expansion(`OPERATION_rsh1add_n')
+m4_not_for_expansion(`OPERATION_rsh1sub_n')
+
+
+dnl  Usage: m4_config_gmp_mparam(`symbol')
+dnl
+dnl  Check that `symbol' is defined.  If it isn't, issue an error and
+dnl  terminate immediately.  The error message explains that the symbol
+dnl  should be in config.m4, copied from gmp-mparam.h.
+dnl
+dnl  Termination is immediate since missing say SQR_TOOM2_THRESHOLD can
+dnl  lead to infinite loops and endless error messages.
+
+define(m4_config_gmp_mparam,
+m4_assert_numargs(1)
+`ifdef(`$1',,
+`m4_error(`$1 is not defined.
+	"configure" should have extracted this from gmp-mparam.h and put it
+	in config.m4 (or in <cpu>_<file>.asm for a fat binary), but somehow
+        this has failed.
+')m4exit(1)')')
+
+
+dnl  Usage: defreg(name,reg)
+dnl
+dnl  Give a name to a $ style register.  For example,
+dnl
+dnl         defreg(foo,$12)
+dnl
+dnl  defreg() inserts an extra pair of quotes after the $ so that it's not
+dnl  interpreted as an m4 macro parameter, ie. foo is actually $`'12.  m4
+dnl  strips those quotes when foo is expanded.
+dnl
+dnl  deflit() is used to make the new definition, so it will expand
+dnl  literally even if followed by parentheses ie. foo(99) will become
+dnl  $12(99).  (But there's nowhere that would be used is there?)
+dnl
+dnl  When making further definitions from existing defreg() macros, remember
+dnl  to use defreg() again to protect the $ in the new definitions too.  For
+dnl  example,
+dnl
+dnl         defreg(a0,$4)
+dnl         defreg(a1,$5)
+dnl         ...
+dnl
+dnl         defreg(PARAM_DST,a0)
+dnl
+dnl  This is only because a0 is expanding at the time the PARAM_DST
+dnl  definition is made, leaving a literal $4 that must be re-quoted.  On
+dnl  the other hand in something like the following ra is only expanded when
+dnl  ret is used and its $`'31 protection will have its desired effect at
+dnl  that time.
+dnl
+dnl         defreg(ra,$31)
+dnl         ...
+dnl         define(ret,`j ra')
+dnl
+dnl  Note that only $n forms are meant to be used here, and something like
+dnl  128($30) doesn't get protected and will come out wrong.
+
+define(defreg,
+m4_assert_numargs(2)
+`deflit(`$1',
+substr(`$2',0,1)``''substr(`$2',1))')
+
+
+dnl  Usage: m4_instruction_wrapper()
+dnl
+dnl  Put this, unquoted, on a line on its own, at the start of a macro
+dnl  that's a wrapper around an assembler instruction.  It adds code to give
+dnl  a descriptive error message if the macro is invoked without arguments.
+dnl
+dnl  For example, suppose jmp needs to be wrapped,
+dnl
+dnl         define(jmp,
+dnl         m4_instruction_wrapper()
+dnl         m4_assert_numargs(1)
+dnl                 `.byte 0x42
+dnl                 .long  $1
+dnl                 nop')
+dnl
+dnl  The point of m4_instruction_wrapper is to get a better error message
+dnl  than m4_assert_numargs would give if jmp is accidentally used as plain
+dnl  "jmp foo" instead of the intended "jmp( foo)".  "jmp()" with no
+dnl  argument also provokes the error message.
+dnl
+dnl  m4_instruction_wrapper should only be used with wrapped instructions
+dnl  that take arguments, since obviously something meant to be used as say
+dnl  plain "ret" doesn't want to give an error when used that way.
+
+define(m4_instruction_wrapper,
+m4_assert_numargs(0)
+``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl
+ifdef(`__file__',`m4_doublequote(__file__)',``the m4 sources''),dnl
+$`#',m4_doublequote($`'1))`dnl'')
+
+dnl  Called: m4_instruction_wrapper_internal($0,`filename',$#,$1)
+define(m4_instruction_wrapper_internal,
+`ifelse(eval($3<=1 && m4_length(`$4')==0),1,
+`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details
+')')')
+
+
+dnl  Usage: m4_cpu_hex_constant(string)
+dnl
+dnl  Expand to the string prefixed by a suitable `0x' hex marker.  This
+dnl  should be redefined as necessary for CPUs with different conventions.
+
+define(m4_cpu_hex_constant,
+m4_assert_numargs(1)
+`0x`$1'')
+
+
+dnl  Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES
+dnl         CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES
+dnl
+dnl  When code supports a variable amount of loop unrolling, the convention
+dnl  is to define UNROLL_COUNT to the number of limbs processed per loop.
+dnl  When testing code this can be varied to see how much the loop overhead
+dnl  is costing.  For example,
+dnl
+dnl         deflit(UNROLL_COUNT, 32)
+dnl
+dnl  If the forloop() generating the unrolled loop has a pattern processing
+dnl  more than one limb, the convention is to express this with CHUNK_COUNT.
+dnl  For example,
+dnl
+dnl         deflit(CHUNK_COUNT, 2)
+dnl
+dnl  The LOG2, MASK and BYTES definitions below are derived from these COUNT
+dnl  definitions.  If COUNT is redefined, the LOG2, MASK and BYTES follow
+dnl  the new definition automatically.
+dnl
+dnl  LOG2 is the log base 2 of COUNT.  MASK is COUNT-1, which can be used as
+dnl  a bit mask.  BYTES is GMP_LIMB_BYTES*COUNT, the number of bytes
+dnl  processed in each unrolled loop.
+dnl
+dnl  GMP_LIMB_BYTES is defined in a CPU specific m4 include file.  It
+dnl  exists only so the BYTES definitions here can be common to all CPUs.
+dnl  In the actual code for a given CPU, an explicit 4 or 8 may as well be
+dnl  used because the code is only for a particular CPU, it doesn't need to
+dnl  be general.
+dnl
+dnl  Note that none of these macros do anything except give conventional
+dnl  names to commonly used things.  You still have to write your own
+dnl  expressions for a forloop() and the resulting address displacements.
+dnl  Something like the following would be typical for 4 bytes per limb.
+dnl
+dnl         forloop(`i',0,UNROLL_COUNT-1,`
+dnl                 deflit(`disp',eval(i*4))
+dnl                 ...
+dnl         ')
+dnl
+dnl  Or when using CHUNK_COUNT,
+dnl
+dnl         forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,`
+dnl                 deflit(`disp0',eval(i*CHUNK_COUNT*4))
+dnl                 deflit(`disp1',eval(disp0+4))
+dnl                 ...
+dnl         ')
+dnl
+dnl  Clearly `i' can be run starting from 1, or from high to low or whatever
+dnl  best suits.
+
+deflit(UNROLL_LOG2,
+m4_assert_defined(`UNROLL_COUNT')
+`m4_log2(UNROLL_COUNT)')
+
+deflit(UNROLL_MASK,
+m4_assert_defined(`UNROLL_COUNT')
+`eval(UNROLL_COUNT-1)')
+
+deflit(UNROLL_BYTES,
+m4_assert_defined(`UNROLL_COUNT')
+m4_assert_defined(`GMP_LIMB_BYTES')
+`eval(UNROLL_COUNT * GMP_LIMB_BYTES)')
+
+deflit(CHUNK_LOG2,
+m4_assert_defined(`CHUNK_COUNT')
+`m4_log2(CHUNK_COUNT)')
+
+deflit(CHUNK_MASK,
+m4_assert_defined(`CHUNK_COUNT')
+`eval(CHUNK_COUNT-1)')
+
+deflit(CHUNK_BYTES,
+m4_assert_defined(`CHUNK_COUNT')
+m4_assert_defined(`GMP_LIMB_BYTES')
+`eval(CHUNK_COUNT * GMP_LIMB_BYTES)')
+
+
+dnl  Usage: MPN(name)
+dnl
+dnl  Add MPN_PREFIX to a name.
+dnl  MPN_PREFIX defaults to "__gmpn_" if not defined.
+dnl
+dnl  m4_unquote is used in MPN so that when it expands to say __gmpn_foo,
+dnl  that identifier will be subject to further macro expansion.  This is
+dnl  used by some of the fat binary support for renaming symbols.
+
+ifdef(`MPN_PREFIX',,
+`define(`MPN_PREFIX',`__gmpn_')')
+
+define(MPN,
+m4_assert_numargs(1)
+`m4_unquote(MPN_PREFIX`'$1)')
+
+
+dnl  Usage: mpn_add_n, etc
+dnl
+dnl  Convenience definitions using MPN(), like the #defines in gmp.h.  Each
+dnl  function that might be implemented in assembler is here.
+
+define(define_mpn,
+m4_assert_numargs(1)
+`deflit(`mpn_$1',`MPN(`$1')')')
+
+define_mpn(add)
+define_mpn(add_1)
+define_mpn(add_err1_n)
+define_mpn(add_err2_n)
+define_mpn(add_err3_n)
+define_mpn(add_n)
+define_mpn(add_nc)
+define_mpn(addlsh1_n)
+define_mpn(addlsh1_nc)
+define_mpn(addlsh2_n)
+define_mpn(addlsh2_nc)
+define_mpn(addlsh_n)
+define_mpn(addlsh_nc)
+define_mpn(addlsh1_n_ip1)
+define_mpn(addlsh1_nc_ip1)
+define_mpn(addlsh2_n_ip1)
+define_mpn(addlsh2_nc_ip1)
+define_mpn(addlsh_n_ip1)
+define_mpn(addlsh_nc_ip1)
+define_mpn(addlsh1_n_ip2)
+define_mpn(addlsh1_nc_ip2)
+define_mpn(addlsh2_n_ip2)
+define_mpn(addlsh2_nc_ip2)
+define_mpn(addlsh_n_ip2)
+define_mpn(addlsh_nc_ip2)
+define_mpn(addmul_1)
+define_mpn(addmul_1c)
+define_mpn(addmul_2)
+define_mpn(addmul_3)
+define_mpn(addmul_4)
+define_mpn(addmul_5)
+define_mpn(addmul_6)
+define_mpn(addmul_7)
+define_mpn(addmul_8)
+define_mpn(addmul_2s)
+define_mpn(add_n_sub_n)
+define_mpn(add_n_sub_nc)
+define_mpn(addaddmul_1msb0)
+define_mpn(and_n)
+define_mpn(andn_n)
+define_mpn(bdiv_q_1)
+define_mpn(pi1_bdiv_q_1)
+define_mpn(bdiv_dbm1c)
+define_mpn(cmp)
+define_mpn(cnd_add_n)
+define_mpn(cnd_sub_n)
+define_mpn(com)
+define_mpn(copyd)
+define_mpn(copyi)
+define_mpn(count_leading_zeros)
+define_mpn(count_trailing_zeros)
+define_mpn(div_qr_1n_pi1)
+define_mpn(div_qr_2)
+define_mpn(div_qr_2n_pi1)
+define_mpn(div_qr_2u_pi1)
+define_mpn(div_qr_2n_pi2)
+define_mpn(div_qr_2u_pi2)
+define_mpn(divexact_1)
+define_mpn(divexact_by3c)
+define_mpn(divrem)
+define_mpn(divrem_1)
+define_mpn(divrem_1c)
+define_mpn(divrem_2)
+define_mpn(divrem_classic)
+define_mpn(divrem_newton)
+define_mpn(dump)
+define_mpn(gcd)
+define_mpn(gcd_1)
+define_mpn(gcd_11)
+define_mpn(gcd_22)
+define_mpn(gcdext)
+define_mpn(get_str)
+define_mpn(hamdist)
+define_mpn(invert_limb)
+define_mpn(invert_limb_table)
+define_mpn(ior_n)
+define_mpn(iorn_n)
+define_mpn(lshift)
+define_mpn(lshiftc)
+define_mpn(mod_1_1p)
+define_mpn(mod_1_1p_cps)
+define_mpn(mod_1s_2p)
+define_mpn(mod_1s_2p_cps)
+define_mpn(mod_1s_3p)
+define_mpn(mod_1s_3p_cps)
+define_mpn(mod_1s_4p)
+define_mpn(mod_1s_4p_cps)
+define_mpn(mod_1)
+define_mpn(mod_1c)
+define_mpn(mod_34lsub1)
+define_mpn(modexact_1_odd)
+define_mpn(modexact_1c_odd)
+define_mpn(mul)
+define_mpn(mul_1)
+define_mpn(mul_1c)
+define_mpn(mul_2)
+define_mpn(mul_3)
+define_mpn(mul_4)
+define_mpn(mul_5)
+define_mpn(mul_6)
+define_mpn(mul_basecase)
+define_mpn(mul_n)
+define_mpn(mullo_basecase)
+define_mpn(mulmid_basecase)
+define_mpn(perfect_square_p)
+define_mpn(popcount)
+define_mpn(preinv_divrem_1)
+define_mpn(preinv_mod_1)
+define_mpn(nand_n)
+define_mpn(neg)
+define_mpn(nior_n)
+define_mpn(powm)
+define_mpn(powlo)
+define_mpn(random)
+define_mpn(random2)
+define_mpn(redc_1)
+define_mpn(redc_2)
+define_mpn(rsblsh1_n)
+define_mpn(rsblsh1_nc)
+define_mpn(rsblsh2_n)
+define_mpn(rsblsh2_nc)
+define_mpn(rsblsh_n)
+define_mpn(rsblsh_nc)
+define_mpn(rsh1add_n)
+define_mpn(rsh1add_nc)
+define_mpn(rsh1sub_n)
+define_mpn(rsh1sub_nc)
+define_mpn(rshift)
+define_mpn(rshiftc)
+define_mpn(sbpi1_bdiv_q)
+define_mpn(sbpi1_bdiv_qr)
+define_mpn(sbpi1_bdiv_r)
+define_mpn(scan0)
+define_mpn(scan1)
+define_mpn(set_str)
+define_mpn(sqr_basecase)
+define_mpn(sqr_diagonal)
+define_mpn(sqr_diag_addlsh1)
+define_mpn(sub_n)
+define_mpn(sublsh1_n)
+define_mpn(sublsh1_nc)
+define_mpn(sublsh1_n_ip1)
+define_mpn(sublsh1_nc_ip1)
+define_mpn(sublsh2_n)
+define_mpn(sublsh2_nc)
+define_mpn(sublsh2_n_ip1)
+define_mpn(sublsh2_nc_ip1)
+define_mpn(sublsh_n)
+define_mpn(sublsh_nc)
+define_mpn(sublsh_n_ip1)
+define_mpn(sublsh_nc_ip1)
+define_mpn(sqrtrem)
+define_mpn(sub)
+define_mpn(sub_1)
+define_mpn(sub_err1_n)
+define_mpn(sub_err2_n)
+define_mpn(sub_err3_n)
+define_mpn(sub_n)
+define_mpn(sub_nc)
+define_mpn(submul_1)
+define_mpn(submul_1c)
+define_mpn(sec_tabselect)
+define_mpn(umul_ppmm)
+define_mpn(umul_ppmm_r)
+define_mpn(udiv_qrnnd)
+define_mpn(udiv_qrnnd_r)
+define_mpn(xnor_n)
+define_mpn(xor_n)
+
+
+dnl  Defines for C global arrays and variables, with names matching what's
+dnl  used in the C code.
+dnl
+dnl  Notice that GSYM_PREFIX is included, unlike with the function defines
+dnl  above.  Also, "deflit" is used so that something like __clz_tab(%ebx)
+dnl  comes out as __gmpn_clz_tab(%ebx), for the benefit of CPUs with that
+dnl  style assembler syntax.
+
+deflit(__clz_tab,
+m4_assert_defined(`GSYM_PREFIX')
+`GSYM_PREFIX`'MPN(`clz_tab')')
+
+deflit(binvert_limb_table,
+m4_assert_defined(`GSYM_PREFIX')
+`GSYM_PREFIX`'__gmp_binvert_limb_table')
+
+
+dnl  Usage: ASM_START()
+dnl
+dnl  Emit any directives needed once at the start of an assembler file, like
+dnl  ".set noreorder" or whatever.  The default for this is nothing, but
+dnl  it's redefined by CPU specific m4 files.
+
+define(ASM_START)
+
+
+dnl  Usage: ASM_END()
+dnl
+dnl  Emit any directives needed once at the end of an assembler file.  The
+dnl  default for this is nothing, but it's redefined by CPU specific m4 files.
+
+define(ASM_END)
+
+
+dnl  Usage: PROLOGUE(foo[,param])
+dnl         EPILOGUE(foo)
+dnl
+dnl  Emit directives to start or end a function.  GSYM_PREFIX is added by
+dnl  these macros if necessary, so the given "foo" is what the function will
+dnl  be called in C.
+dnl
+dnl  The second parameter to PROLOGUE is used only for some CPUs and should
+dnl  be omitted if not required.
+dnl
+dnl  Nested or overlapping PROLOGUE/EPILOGUE pairs are allowed, if that
+dnl  makes sense for the system.  The name given to EPILOGUE must be a
+dnl  currently open PROLOGUE.
+dnl
+dnl  If only one PROLOGUE is open then the name can be omitted from
+dnl  EPILOGUE.  This is encouraged, since it means the name only has to
+dnl  appear in one place, not two.
+dnl
+dnl  The given name "foo" is not fully quoted here, it will be macro
+dnl  expanded more than once.  This is the way the m4_list macros work, and
+dnl  it also helps the tune/many.pl program do a renaming like
+dnl  -D__gmpn_add_n=mpn_add_n_foo when GSYM_PREFIX is not empty.
+
+define(PROLOGUE,
+m4_assert_numargs_range(1,2)
+`m4_file_seen()dnl
+define(`PROLOGUE_list',m4_list_quote($1,PROLOGUE_list))dnl
+ifelse(`$2',,
+`PROLOGUE_cpu(GSYM_PREFIX`'$1)',
+`PROLOGUE_cpu(GSYM_PREFIX`'$1,`$2')')')
+
+define(EPILOGUE,
+m4_assert_numargs_range(0,1)
+`ifelse(`$1',,
+`ifelse(m4_list_count(PROLOGUE_list),0,
+`m4_error(`no open functions for EPILOGUE
+')',
+`ifelse(m4_list_count(PROLOGUE_list),1,
+`EPILOGUE_internal(PROLOGUE_current_function)',
+`m4_error(`more than one open function for EPILOGUE
+')')')',
+`EPILOGUE_internal(`$1')')')
+
+define(EPILOGUE_internal,
+m4_assert_numargs(1)
+m4_assert_defined(`EPILOGUE_cpu')
+`ifelse(m4_list_find($1,PROLOGUE_list),0,
+`m4_error(`EPILOGUE without PROLOGUE: $1
+')')dnl
+define(`PROLOGUE_list',m4_list_quote(m4_list_remove($1,PROLOGUE_list)))dnl
+EPILOGUE_cpu(GSYM_PREFIX`$1')')
+
+dnl  Currently open PROLOGUEs, as a comma-separated list.
+define(PROLOGUE_list)
+
+
+dnl  Called: PROLOGUE_check(list,...)
+dnl  Check there's no remaining open PROLOGUEs at the end of input.
+define(PROLOGUE_check,
+`ifelse($1,,,
+`m4_error(`no EPILOGUE for: $1
+')dnl
+PROLOGUE_check(shift($@))')')
+
+m4wrap_prepend(`PROLOGUE_check(PROLOGUE_list)')
+
+
+dnl  Usage: PROLOGUE_current_function
+dnl
+dnl  This macro expands to the current PROLOGUE/EPILOGUE function, or the
+dnl  most recent PROLOGUE if such pairs are nested or overlapped.
+
+define(PROLOGUE_current_function,
+m4_assert_numargs(-1)
+`m4_list_first(PROLOGUE_list)')
+
+
+dnl  Usage: PROLOGUE_cpu(GSYM_PREFIX`'foo[,param])
+dnl         EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  These macros hold the CPU-specific parts of PROLOGUE and EPILOGUE.
+dnl  Both are called with the function name, with GSYM_PREFIX already
+dnl  prepended.
+dnl
+dnl  The definitions here are something typical and sensible, but CPU or
+dnl  system specific m4 files should redefine them as necessary.  The
+dnl  optional extra parameter to PROLOGUE_cpu is not expected and not
+dnl  accepted here.
+
+define(PROLOGUE_cpu,
+m4_assert_numargs(1)
+`	TEXT
+	ALIGN(8)
+	GLOBL	`$1' GLOBL_ATTR
+	TYPE(`$1',`function')
+`$1'LABEL_SUFFIX')
+
+define(EPILOGUE_cpu,
+`	SIZE(`$1',.-`$1')')
+
+
+dnl  Usage: L(name)
+dnl
+dnl  Generate a local label with the given name.  This is simply a
+dnl  convenient way to add LSYM_PREFIX.
+dnl
+dnl  LSYM_PREFIX might be L$, so defn() must be used to quote it or the L
+dnl  will expand again as the L macro, making an infinite recursion.
+
+define(`L',
+m4_assert_numargs(1)
+`defn(`LSYM_PREFIX')$1')
+
+
+dnl  Usage: LDEF(name)
+dnl
+dnl  Generate a directive to define a local label.
+dnl
+dnl  On systems with a fixed syntax for defining labels there's no need to
+dnl  use this macro, it's only meant for systems where the syntax varies,
+dnl  like hppa which is "L(foo):" with gas, but just "L(foo)" in column 0
+dnl  with the system `as'.
+dnl
+dnl  The extra `' after LABEL_SUFFIX avoids any chance of a following
+dnl  "(...)"  being interpreted as an argument list.  Not that it'd be
+dnl  sensible to write anything like that after an LDEF(), but just in case.
+
+define(LDEF,
+m4_assert_numargs(1)
+m4_assert_defined(`LABEL_SUFFIX')
+`L(`$1')`'LABEL_SUFFIX`'')
+
+
+dnl  Usage: INT32(label,value)
+dnl         INT64(label,first,second)
+
+define(`INT32',
+m4_assert_defined(`W32')
+`	ALIGN(4)
+LDEF(`$1')
+	W32	$2')
+
+define(`INT64',
+m4_assert_defined(`W32')
+`	ALIGN(8)
+LDEF(`$1')
+	W32	$2
+	W32	$3')
+
+
+dnl  Usage: ALIGN(bytes)
+dnl
+dnl  Emit a ".align" directive.  The alignment is specified in bytes, and
+dnl  will normally need to be a power of 2.  The actual ".align" generated
+dnl  is either bytes or logarithmic according to what ./configure finds the
+dnl  assembler needs.
+dnl
+dnl  If ALIGN_FILL_0x90 is defined and equal to "yes", then ", 0x90" is
+dnl  appended.  This is for x86, see mpn/x86/README.
+
+define(ALIGN,
+m4_assert_numargs(1)
+m4_assert_defined(`ALIGN_LOGARITHMIC')
+`.align	ifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl
+ifelse(ALIGN_FILL_0x90,yes,`, 0x90')')
+
+
+dnl  Usage: MULFUNC_PROLOGUE(function function...)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what
+dnl  functions a multi-function file is providing.  Use this if there aren't
+dnl  explicit PROLOGUE()s for each possible function.
+dnl
+dnl  Multiple MULFUNC_PROLOGUEs can be used, or just one with the function
+dnl  names separated by spaces.
+
+define(`MULFUNC_PROLOGUE',
+m4_assert_numargs(1)
+)
+
+
+dnl  Usage: NAILS_SUPPORT(spec spec ...)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what nails
+dnl  are supported in an asm file.
+dnl
+dnl  Ranges can be given, or just individual values.  Multiple values or
+dnl  ranges can be given, separated by spaces.  Multiple NAILS_SUPPORT
+dnl  declarations work too.  Some examples,
+dnl
+dnl         NAILS_SUPPORT(1-20)
+dnl         NAILS_SUPPORT(1 6 9-12)
+dnl         NAILS_SUPPORT(1-10 16-20)
+
+define(NAILS_SUPPORT,
+m4_assert_numargs(1)
+)
+
+
+dnl  Usage: ABI_SUPPORT(abi)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what ABIs
+dnl  are supported in an asm file.
+dnl
+dnl  If multiple non-standard ABIs are supported, several ABI_SUPPORT
+dnl  declarations should be used:
+dnl
+dnl         ABI_SUPPORT(FOOABI)
+dnl         ABI_SUPPORT(BARABI)
+
+define(ABI_SUPPORT,
+m4_assert_numargs(1)
+)
+
+
+dnl  Usage: GMP_NUMB_MASK
+dnl
+dnl  A bit mask for the number part of a limb.  Eg. with 6 bit nails in a
+dnl  32 bit limb, GMP_NUMB_MASK would be 0x3ffffff.
+
+define(GMP_NUMB_MASK,
+m4_assert_numargs(-1)
+m4_assert_defined(`GMP_NUMB_BITS')
+`m4_hex_lowmask(GMP_NUMB_BITS)')
+
+
+dnl  Usage: m4append(`variable',`value-to-append')
+
+define(`m4append',
+`define(`$1',  defn(`$1')`$2')
+'
+)
+
+divert`'dnl

diff --git a/mpn/generic/add.c b/mpn/generic/add.c
new file mode 100644
index 0000000..4a6e3ba
--- /dev/null
+++ b/mpn/generic/add.c

@@ -0,0 +1,33 @@
+/* mpn_add - add mpn to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/add_1.c b/mpn/generic/add_1.c
new file mode 100644
index 0000000..1745aed
--- /dev/null
+++ b/mpn/generic/add_1.c

@@ -0,0 +1,33 @@
+/* mpn_add_1 - add limb to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add_1 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/add_err1_n.c b/mpn/generic/add_err1_n.c
new file mode 100644
index 0000000..b247f19
--- /dev/null
+++ b/mpn/generic/add_err1_n.c

@@ -0,0 +1,100 @@
+/* mpn_add_err1_n -- add_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}

diff --git a/mpn/generic/add_err2_n.c b/mpn/generic/add_err2_n.c
new file mode 100644
index 0000000..d584d6d
--- /dev/null
+++ b/mpn/generic/add_err2_n.c

@@ -0,0 +1,116 @@
+/* mpn_add_err2_n -- add_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}

diff --git a/mpn/generic/add_err3_n.c b/mpn/generic/add_err3_n.c
new file mode 100644
index 0000000..a6ed4dc
--- /dev/null
+++ b/mpn/generic/add_err3_n.c

@@ -0,0 +1,131 @@
+/* mpn_add_err3_n -- add_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}

diff --git a/mpn/generic/add_n.c b/mpn/generic/add_n.c
new file mode 100644
index 0000000..f62ac87
--- /dev/null
+++ b/mpn/generic/add_n.c

@@ -0,0 +1,89 @@
+/* mpn_add_n -- Add equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul + vl + cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif

diff --git a/mpn/generic/add_n_sub_n.c b/mpn/generic/add_n_sub_n.c
new file mode 100644
index 0000000..1e72b5d
--- /dev/null
+++ b/mpn/generic/add_n_sub_n.c

@@ -0,0 +1,172 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999-2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / GMP_LIMB_BYTES / 6)
+
+
+/* mpn_add_n_sub_n.
+   r1[] = s1[] + s2[]
+   r2[] = s1[] - s2[]
+   All operands have n limbs.
+   In-place operations allowed.  */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+  mp_limb_t acyn, acyo;		/* carry for add */
+  mp_limb_t scyn, scyo;		/* carry for subtract */
+  mp_size_t off;		/* offset in operands */
+  mp_size_t this_n;		/* size of current chunk */
+
+  /* We alternatingly add and subtract in chunks that fit into the (L1)
+     cache.  Since the chunks are several hundred limbs, the function call
+     overhead is insignificant, but we get much better locality.  */
+
+  /* We have three variant of the inner loop, the proper loop is chosen
+     depending on whether r1 or r2 are the same operand as s1 or s2.  */
+
+  if (r1p != s1p && r1p != s2p)
+    {
+      /* r1 is not identical to either input operand.  We can therefore write
+	 to r1 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	}
+    }
+  else if (r2p != s1p && r2p != s2p)
+    {
+      /* r2 is not identical to either input operand.  We can therefore write
+	 to r2 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+	}
+    }
+  else
+    {
+      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+	 Need temporary storage.  */
+      mp_limb_t tp[PART_SIZE];
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	  MPN_COPY (r1p + off, tp, this_n);
+	}
+    }
+
+  return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr r1p, r2p, s1p, s2p;
+  double t;
+  mp_size_t n;
+
+  n = strtol (argv[1], 0, 0);
+
+  r1p = malloc (n * GMP_LIMB_BYTES);
+  r2p = malloc (n * GMP_LIMB_BYTES);
+  s1p = malloc (n * GMP_LIMB_BYTES);
+  s2p = malloc (n * GMP_LIMB_BYTES);
+  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+  printf ("              separate add and sub: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  printf ("combined addsub separate variables: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r1 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r2 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  printf ("          combined addsub in-place: %.3f\n", t);
+
+  return 0;
+}
+#endif

diff --git a/mpn/generic/addmul_1.c b/mpn/generic/addmul_1.c
new file mode 100644
index 0000000..6140e8e
--- /dev/null
+++ b/mpn/generic/addmul_1.c

@@ -0,0 +1,145 @@
+/* mpn_addmul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   add the N least significant limbs of the product to the limb vector
+   pointed to by RP.  Return the most significant limb of the product,
+   adjusted for carry-out from the addition.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004, 2016 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t u0, crec, c, p1, p0, r0;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  crec = 0;
+  do
+    {
+      u0 = *up++;
+      umul_ppmm (p1, p0, u0, v0);
+
+      r0 = *rp;
+
+      p0 = r0 + p0;
+      c = r0 > p0;
+
+      p1 = p1 + c;
+
+      r0 = p0 + crec;		/* cycle 0, 3, ... */
+      c = p0 > r0;		/* cycle 1, 4, ... */
+
+      crec = p1 + c;		/* cycle 2, 5, ... */
+
+      *rp++ = r0;
+    }
+  while (--n != 0);
+
+  return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, crec, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  crec = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      ADDC_LIMB (c1, xl, prev_p1, p0);
+      ADDC_LIMB (c2, xl, xl, r0);
+      ADDC_LIMB (c3, xl, xl, crec);
+      crec = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, crec, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  crec = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      xw = prev_p1 + p0 + r0 + crec;
+      crec = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + crec;
+}
+
+#endif

diff --git a/mpn/generic/bdiv_dbm1c.c b/mpn/generic/bdiv_dbm1c.c
new file mode 100644
index 0000000..543bb6e
--- /dev/null
+++ b/mpn/generic/bdiv_dbm1c.c

@@ -0,0 +1,58 @@
+/* mpn_bdiv_dbm1c -- divide an mpn number by a divisor of B-1, where B is the
+   limb base.  The dbm1c moniker means "Divisor of B Minus 1 with Carry".
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdiv_dbm1c (mp_ptr qp, mp_srcptr ap, mp_size_t n, mp_limb_t bd, mp_limb_t h)
+{
+  mp_limb_t a, p0, p1, cy;
+  mp_size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      a = ap[i];
+      umul_ppmm (p1, p0, a, bd << GMP_NAIL_BITS);
+      p0 >>= GMP_NAIL_BITS;
+      cy = h < p0;
+      h = (h - p0) & GMP_NUMB_MASK;
+      qp[i] = h;
+      h = h - p1 - cy;
+    }
+
+  return h;
+}

diff --git a/mpn/generic/bdiv_q.c b/mpn/generic/bdiv_q.c
new file mode 100644
index 0000000..52aa473
--- /dev/null
+++ b/mpn/generic/bdiv_q.c

@@ -0,0 +1,76 @@
+/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n. */
+
+void
+mpn_bdiv_q (mp_ptr qp,
+	    mp_srcptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn,
+	    mp_ptr tp)
+{
+  mp_limb_t di;
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
+    }
+  return;
+}
+
+mp_size_t
+mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    return nn;
+  else
+    return mpn_mu_bdiv_q_itch (nn, dn);
+}

diff --git a/mpn/generic/bdiv_q_1.c b/mpn/generic/bdiv_q_1.c
new file mode 100644
index 0000000..6beb9a0
--- /dev/null
+++ b/mpn/generic/bdiv_q_1.c

@@ -0,0 +1,121 @@
+/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
+   divisor, returning quotient only.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
+		  mp_limb_t di, int shift)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, u, u_next, dummy;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  d <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+
+      u = up[0];
+      rp--;
+      for (i = 1; i < n; i++)
+	{
+	  u_next = up[i];
+	  u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+
+	  SUBC_LIMB (c, l, u, c);
+
+	  l = (l * di) & GMP_NUMB_MASK;
+	  rp[i] = l;
+
+	  umul_ppmm (h, dummy, l, d);
+	  c += h;
+	  u = u_next;
+	}
+
+      u = u >> shift;
+      SUBC_LIMB (c, l, u, c);
+
+      l = (l * di) & GMP_NUMB_MASK;
+      rp[n] = l;
+    }
+  else
+    {
+      u = up[0];
+      l = (u * di) & GMP_NUMB_MASK;
+      rp[0] = l;
+      c = 0;
+
+      for (i = 1; i < n; i++)
+	{
+	  umul_ppmm (h, dummy, l, d);
+	  c += h;
+
+	  u = up[i];
+	  SUBC_LIMB (c, l, u, c);
+
+	  l = (l * di) & GMP_NUMB_MASK;
+	  rp[i] = l;
+	}
+    }
+
+  return c;
+}
+
+mp_limb_t
+mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
+{
+  mp_limb_t di;
+  int shift;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  count_trailing_zeros (shift, d);
+  d >>= shift;
+
+  binvert_limb (di, d);
+  return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
+}

diff --git a/mpn/generic/bdiv_qr.c b/mpn/generic/bdiv_qr.c
new file mode 100644
index 0000000..a4f0f39
--- /dev/null
+++ b/mpn/generic/bdiv_qr.c

@@ -0,0 +1,84 @@
+/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
+   and remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n,
+	    R = N - QD.  */
+
+mp_limb_t
+mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
+	     mp_srcptr np, mp_size_t nn,
+	     mp_srcptr dp, mp_size_t dn,
+	     mp_ptr tp)
+{
+  mp_limb_t di;
+  mp_limb_t rh;
+
+  ASSERT (nn > dn);
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else
+    {
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
+    }
+
+  return rh;
+}
+
+mp_size_t
+mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    return nn;
+  else
+    return  mpn_mu_bdiv_qr_itch (nn, dn);
+}

diff --git a/mpn/generic/binvert.c b/mpn/generic/binvert.c
new file mode 100644
index 0000000..a170e66
--- /dev/null
+++ b/mpn/generic/binvert.c

@@ -0,0 +1,106 @@
+/* Compute {up,n}^(-1) mod B^n.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2004-2007, 2009, 2012, 2017, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/*
+  r[k+1] = r[k] - r[k] * (u*r[k] - 1)
+  r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
+#endif
+
+mp_size_t
+mpn_binvert_itch (mp_size_t n)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+  return itch_local + itch_out;
+}
+
+void
+mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr xp;
+  mp_size_t rn, newrn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_limb_t di;
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
+    *sizp++ = rn;
+
+  xp = scratch;
+
+  /* Compute a base value of rn limbs.  */
+  MPN_ZERO (xp, rn);
+  xp[0] = 1;
+  binvert_limb (di, up[0]);
+  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
+    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+  else
+    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+
+  mpn_neg (rp, rp, rn);
+
+  /* Use Newton iterations to get the desired precision.  */
+  for (; rn < n; rn = newrn)
+    {
+      mp_size_t m;
+      newrn = *--sizp;
+
+      /* X <- UR. */
+      m = mpn_mulmod_bnm1_next_size (newrn);
+      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
+      /* Only the values in the range xp + rn .. xp + newrn - 1 are
+	 used by the _mullo_n below.
+	 Since m >= newrn, we do not need the following. */
+      /* mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1); */
+
+      /* R = R(X/B^rn) */
+      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
+      mpn_neg (rp + rn, rp + rn, newrn - rn);
+    }
+}

diff --git a/mpn/generic/broot.c b/mpn/generic/broot.c
new file mode 100644
index 0000000..02fe75a
--- /dev/null
+++ b/mpn/generic/broot.c

@@ -0,0 +1,195 @@
+/* mpn_broot -- Compute hensel sqrt
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+   Iterates
+
+     r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+   If
+
+     a^{k-1} r^k = 1 (mod 2^m),
+
+   then
+
+     a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+   Compute the update term as
+
+     r' = r - (a^{k-1} r^{k+1} - r) / k
+
+   where we still have cancellation of low limbs.
+
+ */
+void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_size_t sizes[GMP_LIMB_BITS * 2];
+  mp_ptr akm1, tp, rnp, ep;
+  mp_limb_t a0, r0, km1, kp1h, kinv;
+  mp_size_t rn;
+  unsigned i;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+  ASSERT (k >= 3);
+
+  TMP_MARK;
+
+  akm1 = TMP_ALLOC_LIMBS (4*n);
+  tp = akm1 + n;
+
+  km1 = k-1;
+  /* FIXME: Could arrange the iteration so we don't need to compute
+     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+     that we can use wraparound also for a*r, since the low half is
+     unchanged from the previous iteration. Or possibly mulmid. Also,
+     a r = a^{1/k}, so we get that value too, for free? */
+  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+  a0 = ap[0];
+  binvert_limb (kinv, k);
+
+  /* 4 bits: a^{1/k - 1} (mod 16):
+
+	a % 8
+	1 3 5 7
+   k%4 +-------
+     1 |1 1 1 1
+     3 |1 9 9 1
+  */
+  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (n == 1)
+    {
+      TMP_FREE;
+      return;
+    }
+
+  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+  kp1h = k/2 + 1;
+
+  /* FIXME: Special case for two limb iteration. */
+  rnp = TMP_ALLOC_LIMBS (2*n + 1);
+  ep = rnp + n;
+
+  /* FIXME: Possible to this on the fly with some bit fiddling. */
+  for (i = 0; n > 1; n = (n + 1)/2)
+    sizes[i++] = n;
+
+  rn = 1;
+
+  while (i-- > 0)
+    {
+      /* Compute x^{k+1}. */
+      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+			       final iteration. */
+      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
+
+      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
+
+      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+      ASSERT (mpn_cmp (ep, rp, rn) == 0);
+
+      ASSERT (sizes[i] <= 2*rn);
+      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
+      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+      rn = sizes[i];
+    }
+  TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+
+  if (k == 1)
+    {
+      MPN_COPY (rp, ap, n);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+
+  mpn_broot_invm1 (tp, ap, n, k);
+  mpn_mullo_n (rp, tp, ap, n);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/brootinv.c b/mpn/generic/brootinv.c
new file mode 100644
index 0000000..e91b597
--- /dev/null
+++ b/mpn/generic/brootinv.c

@@ -0,0 +1,159 @@
+/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes a^2e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powsquaredlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r;
+
+  r = 1;
+  /* if (LIKELY (e != 0)) */
+  do {
+    a *= a;
+    if (e & 1)
+      r *= a;
+    e >>= 1;
+  } while (e != 0);
+
+  return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
+
+   Iterates
+     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+   using Hensel lifting, each time doubling the number of known bits in r.
+
+   Works just for odd k.  Else the Hensel lifting degenerates.
+
+   FIXME:
+
+     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
+
+     (2) Rewrite iteration as
+	   r' <-- r - k^{-1} r (r^k y - 1)
+	 and take advantage of the zero low part of r^k y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+
+   Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo
+   Currently mpn_powlo requires 3*bn
+   so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
+
+void
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
+{
+  mp_ptr tp2, tp3;
+  mp_limb_t kinv, k2, r0, y0;
+  mp_size_t order[GMP_LIMB_BITS + 1];
+  int d;
+
+  ASSERT (bn > 0);
+  ASSERT ((k & 1) != 0);
+
+  tp2 = tp + bn;
+  tp3 = tp + bn + ((bn + 3) >> 1);
+  k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */
+
+  binvert_limb (kinv, k);
+
+  /* 4-bit initial approximation:
+
+   y%16 | 1  3  5  7  9 11 13 15,
+    k%4 +-------------------------+k2%2
+     1  | 1 11 13  7  9  3  5 15  |  1
+     3  | 1  3  5  7  9 11 13 15  |  0
+
+  */
+  y0 = yp[0];
+
+  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8);			/* 4 bits */
+  r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f));	/* 8 bits */
+  r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff));	/* 16 bits */
+#if GMP_NUMB_BITS > 16
+  {
+    unsigned prec = 16;
+    do
+      {
+	r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (bn == 1)
+    return;
+
+  d = 0;
+  for (; bn != 2; bn = (bn + 1) >> 1)
+    order[d++] = bn;
+
+  order[d] = 2;
+  bn = 1;
+
+  do
+    {
+      mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */
+      tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);
+
+      bn = order[d];
+
+      mpn_powlo (rp, tp, &k2, 1, bn, tp3);
+      mpn_mullo_n (tp, yp, rp, bn);
+
+      /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */
+      /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/
+      {
+	mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */
+	int borrow;
+	borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;
+	if (bn > pbn) /* 3 < bn */
+	  {
+	    if (borrow)
+	      mpn_com (tp + pbn, tp + pbn, bn - pbn);
+	    else
+	      mpn_neg (tp + pbn, tp + pbn, bn - pbn);
+	  }
+      }
+      mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);
+    }
+  while (--d >= 0);
+}

diff --git a/mpn/generic/bsqrt.c b/mpn/generic/bsqrt.c
new file mode 100644
index 0000000..27184f0
--- /dev/null
+++ b/mpn/generic/bsqrt.c

@@ -0,0 +1,47 @@
+/* mpn_bsqrt, a^{1/2} (mod 2^n).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
+{
+  mp_ptr sp;
+  mp_size_t n;
+
+  ASSERT (nb > 0);
+
+  n = nb / GMP_NUMB_BITS;
+  sp = tp + n;
+
+  mpn_bsqrtinv (tp, ap, nb, sp);
+  mpn_mullo_n (rp, tp, ap, n);
+}

diff --git a/mpn/generic/bsqrtinv.c b/mpn/generic/bsqrtinv.c
new file mode 100644
index 0000000..c286773
--- /dev/null
+++ b/mpn/generic/bsqrtinv.c

@@ -0,0 +1,103 @@
+/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
+   Return non-zero if such an integer r exists.
+
+   Iterates
+     r' <-- (3r - r^3 y) / 2
+   using Hensel lifting.  Since we divide by two, the Hensel lifting is
+   somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
+
+   FIXME:
+     (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+     (2) Rewrite iteration as
+	   r' <-- r - r (r^2 y - 1) / 2
+	 and take advantage of zero low part of r^2 y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+*/
+int
+mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
+{
+  mp_ptr tp2;
+  mp_size_t bn, order[GMP_LIMB_BITS + 1];
+  int i, d;
+
+  ASSERT (bnb > 0);
+
+  bn = 1 + bnb / GMP_LIMB_BITS;
+
+  tp2 = tp + bn;
+
+  rp[0] = 1;
+  if (bnb == 1)
+    {
+      if ((yp[0] & 3) != 1)
+	return 0;
+    }
+  else
+    {
+      if ((yp[0] & 7) != 1)
+	return 0;
+
+      d = 0;
+      for (; bnb != 2; bnb = (bnb + 2) >> 1)
+	order[d++] = bnb;
+
+      for (i = d - 1; i >= 0; i--)
+	{
+	  bnb = order[i];
+	  bn = 1 + bnb / GMP_LIMB_BITS;
+
+	  mpn_sqrlo (tp, rp, bn);
+	  mpn_mullo_n (tp2, rp, tp, bn); /* tp2 <- rp ^ 3 */
+
+	  mpn_mul_1 (tp, rp, bn, 3);
+
+	  mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+	  mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+	  mpn_sub_n (tp2, tp, rp, bn);
+	  mpn_rshift (rp, tp2, bn, 1);
+#endif
+	}
+    }
+  return 1;
+}

diff --git a/mpn/generic/cmp.c b/mpn/generic/cmp.c
new file mode 100644
index 0000000..940314b
--- /dev/null
+++ b/mpn/generic/cmp.c

@@ -0,0 +1,33 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_cmp 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/cnd_add_n.c b/mpn/generic/cnd_add_n.c
new file mode 100644
index 0000000..e6b1373
--- /dev/null
+++ b/mpn/generic/cnd_add_n.c

@@ -0,0 +1,69 @@
+/* mpn_cnd_add_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+#else
+      rl = ul + vl;
+      rl += cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+#endif
+    }
+  while (--n != 0);
+
+  return cy;
+}

diff --git a/mpn/generic/cnd_sub_n.c b/mpn/generic/cnd_sub_n.c
new file mode 100644
index 0000000..d04ad8a
--- /dev/null
+++ b/mpn/generic/cnd_sub_n.c

@@ -0,0 +1,69 @@
+/* mpn_cnd_sub_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+#else
+      rl = ul - vl;
+      rl -= cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+#endif
+    }
+  while (--n != 0);
+
+  return cy;
+}

diff --git a/mpn/generic/cnd_swap.c b/mpn/generic/cnd_swap.c
new file mode 100644
index 0000000..83d856d
--- /dev/null
+++ b/mpn/generic/cnd_swap.c

@@ -0,0 +1,50 @@
+/* mpn_cnd_swap
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp,
+	      mp_size_t n)
+{
+  volatile mp_limb_t mask = - (mp_limb_t) (cnd != 0);
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t a, b, t;
+      a = ap[i];
+      b = bp[i];
+      t = (a ^ b) & mask;
+      ap[i] = a ^ t;
+      bp[i] = b ^ t;
+    }
+}

diff --git a/mpn/generic/com.c b/mpn/generic/com.c
new file mode 100644
index 0000000..4de5824
--- /dev/null
+++ b/mpn/generic/com.c

@@ -0,0 +1,44 @@
+/* mpn_com - complement an mpn.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#undef mpn_com
+#define mpn_com __MPN(com)
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_limb_t ul;
+  do {
+      ul = *up++;
+      *rp++ = ~ul & GMP_NUMB_MASK;
+  } while (--n != 0);
+}

diff --git a/mpn/generic/comb_tables.c b/mpn/generic/comb_tables.c
new file mode 100644
index 0000000..dedb77b
--- /dev/null
+++ b/mpn/generic/comb_tables.c

@@ -0,0 +1,47 @@
+/* Const tables shared among combinatoric functions.
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
+
+/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
+
+/* Entry i contains 2i-popc(2i). */
+const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
+
+const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };

diff --git a/mpn/generic/compute_powtab.c b/mpn/generic/compute_powtab.c
new file mode 100644
index 0000000..f4fbc64
--- /dev/null
+++ b/mpn/generic/compute_powtab.c

@@ -0,0 +1,373 @@
+/* mpn_compute_powtab.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+  CAVEATS:
+  * The exptab and powtab vectors are in opposite orders.  Probably OK.
+  * Consider getting rid of exptab, doing bit ops on the un argument instead.
+  * Consider rounding greatest power slightly upwards to save adjustments.
+  * In powtab_decide, consider computing cost from just the 2-3 largest
+    operands, since smaller operand contribute little.  This makes most sense
+    if exptab is suppressed.
+*/
+
+#include "gmp-impl.h"
+
+#ifndef DIV_1_VS_MUL_1_PERCENT
+#define DIV_1_VS_MUL_1_PERCENT 150
+#endif
+
+#define SET_powers_t(dest, ptr, size, dib, b, sh)	\
+  do {							\
+    dest.p = ptr;					\
+    dest.n = size;					\
+    dest.digits_in_base = dib;				\
+    dest.base = b;					\
+    dest.shift = sh;					\
+  } while (0)
+
+#if DIV_1_VS_MUL_1_PERCENT > 120
+#define HAVE_mpn_compute_powtab_mul 1
+static void
+mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+			int base, const size_t *exptab, size_t n_pows)
+{
+  mp_size_t n;
+  mp_ptr p, t;
+  mp_limb_t cy;
+  long start_idx;
+  int c;
+
+  mp_limb_t big_base = mp_bases[base].big_base;
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+
+  mp_ptr powtab_mem_ptr = powtab_mem;
+
+  size_t digits_in_base = chars_per_limb;
+
+  powers_t *pt = powtab;
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+  p[0] = big_base;
+
+  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+  pt++;
+
+  t = powtab_mem_ptr;
+  powtab_mem_ptr += 2;
+  t[1] = mpn_mul_1 (t, p, 1, big_base);
+  n = 2;
+
+  digits_in_base *= 2;
+
+  c = t[0] == 0;
+  t += c;
+  n -= c;
+  mp_size_t shift = c;
+
+  SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+  p = t;
+  pt++;
+
+  if (exptab[0] == ((size_t) chars_per_limb << n_pows))
+    {
+      start_idx = n_pows - 2;
+    }
+  else
+    {
+      if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
+	{
+	  /* 3, sometimes adjusted to 4.  */
+	  t = powtab_mem_ptr;
+	  powtab_mem_ptr += 4;
+	  t[n] = cy = mpn_mul_1 (t, p, n, big_base);
+	  n += cy != 0;;
+
+	  digits_in_base += chars_per_limb;
+
+	  c  = t[0] == 0;
+	  t += c;
+	  n -= c;
+	  shift += c;
+	}
+      else
+	{
+	  /* 2 copy, will always become 3 with back-multiplication.  */
+	  t = powtab_mem_ptr;
+	  powtab_mem_ptr += 3;
+	  t[0] = p[0];
+	  t[1] = p[1];
+	}
+
+      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+      p = t;
+      pt++;
+      start_idx = n_pows - 3;
+    }
+
+  for (long pi = start_idx; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n + 2;
+
+      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+      mpn_sqr (t, p, n);
+
+      digits_in_base *= 2;
+      n *= 2;
+      n -= t[n - 1] == 0;
+      shift *= 2;
+
+      c = t[0] == 0;
+      t += c;
+      n -= c;
+      shift += c;
+
+      /* Adjust new value if it is too small as input to the next squaring.  */
+      if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
+	{
+	  t[n] = cy = mpn_mul_1 (t, t, n, big_base);
+	  n += cy != 0;
+
+	  digits_in_base += chars_per_limb;
+
+	  c  = t[0] == 0;
+	  t += c;
+	  n -= c;
+	  shift += c;
+	}
+
+      SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
+
+      /* Adjust previous value if it is not at its target power.  */
+      if (pt[-1].digits_in_base < exptab[pi + 1])
+	{
+	  mp_size_t n = pt[-1].n;
+	  mp_ptr p = pt[-1].p;
+	  p[n] = cy = mpn_mul_1 (p, p, n, big_base);
+	  n += cy != 0;
+
+	  ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
+	  pt[-1].digits_in_base = exptab[pi + 1];
+
+	  c = p[0] == 0;
+	  pt[-1].p = p + c;
+	  pt[-1].n = n - c;
+	  pt[-1].shift += c;
+	}
+
+      p = t;
+      pt++;
+    }
+}
+#endif
+
+#if DIV_1_VS_MUL_1_PERCENT < 275
+#define HAVE_mpn_compute_powtab_div 1
+static void
+mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
+			int base, const size_t *exptab, size_t n_pows)
+{
+  mp_ptr p, t;
+
+  mp_limb_t big_base = mp_bases[base].big_base;
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+
+  mp_ptr powtab_mem_ptr = powtab_mem;
+
+  size_t digits_in_base = chars_per_limb;
+
+  powers_t *pt = powtab;
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+  p[0] = big_base;
+
+  SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
+  pt++;
+
+  mp_size_t n = 1;
+  mp_size_t shift = 0;
+  for (long pi = n_pows - 1; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n;
+
+      ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
+
+      mpn_sqr (t, p, n);
+      n = 2 * n - 1; n += t[n] != 0;
+      digits_in_base *= 2;
+
+      if (digits_in_base != exptab[pi])	/* if ((((un - 1) >> pi) & 2) == 0) */
+	{
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
+	  if (__GMP_LIKELY (base == 10))
+	    mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
+			      MP_BASES_BIG_BASE_BINVERTED_10,
+			      MP_BASES_BIG_BASE_CTZ_10);
+	  else
+#endif
+	    /* FIXME: We could use _pi1 here if we add big_base_binverted and
+	       big_base_ctz fields to struct bases.  That would add about 2 KiB
+	       to mp_bases.c.
+	       FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
+	       mpn_bdiv_q_1 for more machines. */
+	    mpn_divexact_1 (t, t, n, big_base);
+
+	  n -= t[n - 1] == 0;
+	  digits_in_base -= chars_per_limb;
+	}
+
+      shift *= 2;
+      /* Strip low zero limbs, but be careful to keep the result divisible by
+	 big_base.  */
+      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
+	{
+	  t++;
+	  n--;
+	  shift++;
+	}
+      p = t;
+
+      SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
+      pt++;
+    }
+
+  /* Strip any remaining low zero limbs.  */
+  pt -= n_pows + 1;
+  for (long pi = n_pows; pi >= 0; pi--)
+    {
+      mp_ptr t = pt[pi].p;
+      mp_size_t shift = pt[pi].shift;
+      mp_size_t n = pt[pi].n;
+      int c;
+      c = t[0] == 0;
+      t += c;
+      n -= c;
+      shift += c;
+      pt[pi].p = t;
+      pt[pi].shift = shift;
+      pt[pi].n = n;
+    }
+}
+#endif
+
+static long
+powtab_decide (size_t *exptab, size_t un, int base)
+{
+  int chars_per_limb = mp_bases[base].chars_per_limb;
+  long n_pows = 0;
+  for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
+    {
+      exptab[n_pows] = pn * chars_per_limb;
+      n_pows++;
+    }
+  exptab[n_pows] = chars_per_limb;
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+  size_t pn = un - 1;
+  size_t xn = (un + 1) >> 1;
+  unsigned mcost = 1;
+  unsigned dcost = 1;
+  for (long i = n_pows - 2; i >= 0; i--)
+    {
+      size_t pow = (pn >> (i + 1)) + 1;
+
+      if (pow & 1)
+	dcost += pow;
+
+      if (xn != (pow << i))
+	{
+	  if (pow > 2 && (pow & 1) == 0)
+	    mcost += 2 * pow;
+	  else
+	    mcost += pow;
+	}
+      else
+	{
+	  if (pow & 1)
+	    mcost += pow;
+	}
+    }
+
+  dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
+
+  if (mcost <= dcost)
+    return n_pows;
+  else
+    return -n_pows;
+#elif HAVE_mpn_compute_powtab_mul
+  return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+  return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}
+
+size_t
+mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+  size_t exptab[GMP_LIMB_BITS];
+
+  long n_pows = powtab_decide (exptab, un, base);
+
+#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
+  if (n_pows >= 0)
+    {
+      mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+      return n_pows;
+    }
+  else
+    {
+      mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+      return -n_pows;
+    }
+#elif HAVE_mpn_compute_powtab_mul
+  ASSERT (n_pows > 0);
+  mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
+  return n_pows;
+#elif HAVE_mpn_compute_powtab_div
+  ASSERT (n_pows < 0);
+  mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
+  return -n_pows;
+#else
+#error "no powtab function available"
+#endif
+}

diff --git a/mpn/generic/copyd.c b/mpn/generic/copyd.c
new file mode 100644
index 0000000..7def007
--- /dev/null
+++ b/mpn/generic/copyd.c

@@ -0,0 +1,40 @@
+/* mpn_copyd
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  for (i = n - 1; i >= 0; i--)
+    rp[i] = up[i];
+}

diff --git a/mpn/generic/copyi.c b/mpn/generic/copyi.c
new file mode 100644
index 0000000..736e0b5
--- /dev/null
+++ b/mpn/generic/copyi.c

@@ -0,0 +1,42 @@
+/* mpn_copyi
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  up += n;
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = up[i];
+}

diff --git a/mpn/generic/dcpi1_bdiv_q.c b/mpn/generic/dcpi1_bdiv_q.c
new file mode 100644
index 0000000..3c21818
--- /dev/null
+++ b/mpn/generic/dcpi1_bdiv_q.c

@@ -0,0 +1,161 @@
+/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009-2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if 0				/* unused, so leave out for now */
+static mp_size_t
+mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
+{
+  /* NOTE: Depends on mullo_n and mpn_dcpi1_bdiv_qr_n interface */
+  return n;
+}
+#endif
+
+/* Computes Q = - N / D mod B^n, destroys N.
+
+   N = {np,n}
+   D = {dp,n}
+*/
+
+static void
+mpn_dcpi1_bdiv_q_n (mp_ptr qp,
+		    mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    mp_limb_t dinv, mp_ptr tp)
+{
+  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+    {
+      mp_size_t lo, hi;
+      mp_limb_t cy;
+
+      lo = n >> 1;			/* floor(n/2) */
+      hi = n - lo;			/* ceil(n/2) */
+
+      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+      mpn_mullo_n (tp, qp, dp + hi, lo);
+      mpn_add_n (np + hi, np + hi, tp, lo);
+
+      if (lo < hi)
+	{
+	  cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
+	  np[n - 1] += cy;
+	}
+      qp += lo;
+      np += lo;
+      n -= lo;
+    }
+  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+/* Computes Q = - N / D mod B^nn, destroys N.
+
+   N = {np,nn}
+   D = {dp,dn}
+*/
+
+void
+mpn_dcpi1_bdiv_q (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);
+  ASSERT (nn - dn >= 0);
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn in a super-efficient manner.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - qn;
+      while (qn > dn)
+	{
+	  mpn_add_1 (np + dn, np + dn, qn - dn, cy);
+	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+	mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
+      else
+	mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+    }
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/dcpi1_bdiv_qr.c b/mpn/generic/dcpi1_bdiv_qr.c
new file mode 100644
index 0000000..11da44f
--- /dev/null
+++ b/mpn/generic/dcpi1_bdiv_qr.c

@@ -0,0 +1,176 @@
+/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+   Output:
+
+      q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
+
+      r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
+
+   Stores q at qp. Stores the n least significant limbs of r at the high half
+   of np, and returns the carry from the addition n + q*d.
+
+   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+mp_size_t
+mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+{
+  return n;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		     mp_limb_t dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy;
+  mp_limb_t rh;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+  mpn_mul (tp, dp + lo, hi, qp, lo);
+
+  mpn_incr_u (tp + lo, cy);
+  rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
+
+  if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp + hi, lo);
+
+  mpn_incr_u (tp + hi, cy);
+  rh += mpn_add_n (np + n, np + n, tp, n);
+
+  return rh;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t rr, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      rr = 0;
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - dn - qn;
+      do
+	{
+	  rr += mpn_add_1 (np + dn, np + dn, qn, cy);
+	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      while (qn > 0);
+      TMP_FREE;
+      return rr + cy;
+    }
+
+  if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+  rr = 0;
+  if (qn != dn)
+    {
+      if (qn > dn - qn)
+	mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+      else
+	mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+      mpn_incr_u (tp + qn, cy);
+
+      rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
+      cy = 0;
+    }
+
+  TMP_FREE;
+  return rr + cy;
+}

diff --git a/mpn/generic/dcpi1_div_q.c b/mpn/generic/dcpi1_div_q.c
new file mode 100644
index 0000000..1905c98
--- /dev/null
+++ b/mpn/generic/dcpi1_div_q.c

@@ -0,0 +1,86 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
+   only.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_ptr tp, wp;
+  mp_limb_t qh;
+  mp_size_t qn;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn - dn >= 3);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+  MPN_COPY (tp + 1, np, nn);
+  tp[0] = 0;
+
+  qn = nn - dn;
+  wp = TMP_ALLOC_LIMBS (qn + 1);
+
+  qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
+
+  if (wp[0] == 0)
+    {
+      mp_limb_t cy;
+
+      if (qn > dn)
+	mpn_mul (tp, wp + 1, qn, dp, dn);
+      else
+	mpn_mul (tp, dp, dn, wp + 1, qn);
+
+      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
+
+      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
+      else /* Same as below */
+	MPN_COPY (qp, wp + 1, qn);
+    }
+  else
+    MPN_COPY (qp, wp + 1, qn);
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/mpn/generic/dcpi1_div_qr.c b/mpn/generic/dcpi1_div_qr.c
new file mode 100644
index 0000000..d7a65f8
--- /dev/null
+++ b/mpn/generic/dcpi1_div_qr.c

@@ -0,0 +1,248 @@
+/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
+   size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  mpn_mul (tp, dp, hi, qp, lo);
+
+  cy = mpn_sub_n (np, np, tp, n);
+  if (ql != 0)
+    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+  while (cy != 0)
+    {
+      mpn_sub_1 (qp, qp, lo, 1);
+      cy -= mpn_add_n (np, np, dp, n);
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_div_qr (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_ALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+	{
+	  mp_limb_t q, n2, n1, n0, d1, d0;
+
+	  /* Handle qh up front, for simplicity. */
+	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+	  if (qh)
+	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+	  /* A single iteration of schoolbook: One 3/2 division,
+	     followed by the bignum update and adjustment. */
+	  n2 = np[0];
+	  n1 = np[-1];
+	  n0 = np[-2];
+	  d1 = dp[-1];
+	  d0 = dp[-2];
+
+	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+	  if (UNLIKELY (n2 == d1) && n1 == d0)
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+	      if (dn > 2)
+		{
+		  mp_limb_t cy, cy1;
+		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+		  cy1 = n0 < cy;
+		  n0 = (n0 - cy) & GMP_NUMB_MASK;
+		  cy = n1 < cy1;
+		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+		  np[-2] = n0;
+
+		  if (UNLIKELY (cy != 0))
+		    {
+		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+		      qh -= (q == 0);
+		      q = (q - 1) & GMP_NUMB_MASK;
+		    }
+		}
+	      else
+		np[-2] = n0;
+
+	      np[-1] = n1;
+	    }
+	  qp[0] = q;
+	}
+      else
+	{
+	  /* Do a 2qn / qn division */
+	  if (qn == 2)
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
+	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+	  else
+	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+	  if (qn != dn)
+	    {
+	      if (qn > dn - qn)
+		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	      else
+		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	      if (qh != 0)
+		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	      while (cy != 0)
+		{
+		  qh -= mpn_sub_1 (qp, qp, qn, 1);
+		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+		}
+	    }
+	}
+
+      qn = nn - dn - qn;
+      do
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+	  qn -= dn;
+	}
+      while (qn > 0);
+    }
+  else
+    {
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+      else
+	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/mpn/generic/dcpi1_divappr_q.c b/mpn/generic/dcpi1_divappr_q.c
new file mode 100644
index 0000000..0abe04e
--- /dev/null
+++ b/mpn/generic/dcpi1_divappr_q.c

@@ -0,0 +1,256 @@
+/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
+   quotient.  The quotient returned is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static mp_limb_t
+mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		       gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  if (UNLIKELY (ql != 0))
+    {
+      mp_size_t i;
+      for (i = 0; i < lo; i++)
+	qp[i] = GMP_NUMB_MASK;
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy, qsave;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn > dn);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn >= dn)
+    {
+      qn++;			/* pretend we'll need an extra limb */
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      tp = TMP_SALLOC_LIMBS (dn);
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+	{
+	  mp_limb_t q, n2, n1, n0, d1, d0;
+
+	  /* Handle qh up front, for simplicity. */
+	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+	  if (qh)
+	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+	  /* A single iteration of schoolbook: One 3/2 division,
+	     followed by the bignum update and adjustment. */
+	  n2 = np[0];
+	  n1 = np[-1];
+	  n0 = np[-2];
+	  d1 = dp[-1];
+	  d0 = dp[-2];
+
+	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+	  if (UNLIKELY (n2 == d1) && n1 == d0)
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+	      if (dn > 2)
+		{
+		  mp_limb_t cy, cy1;
+		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+		  cy1 = n0 < cy;
+		  n0 = (n0 - cy) & GMP_NUMB_MASK;
+		  cy = n1 < cy1;
+		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+		  np[-2] = n0;
+
+		  if (UNLIKELY (cy != 0))
+		    {
+		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+		      qh -= (q == 0);
+		      q = (q - 1) & GMP_NUMB_MASK;
+		    }
+		}
+	      else
+		np[-2] = n0;
+
+	      np[-1] = n1;
+	    }
+	  qp[0] = q;
+	}
+      else
+	{
+	  if (qn == 2)
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+	  else
+	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+	  if (qn != dn)
+	    {
+	      if (qn > dn - qn)
+		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	      else
+		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	      if (qh != 0)
+		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	      while (cy != 0)
+		{
+		  qh -= mpn_sub_1 (qp, qp, qn, 1);
+		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+		}
+	    }
+	}
+      qn = nn - dn - qn + 1;
+      while (qn > dn)
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+	  qn -= dn;
+	}
+
+      /* Since we pretended we'd need an extra quotient limb before, we now
+	 have made sure the code above left just dn-1=qn quotient limbs to
+	 develop.  Develop that plus a guard limb. */
+      qn--;
+      qp -= qn;
+      np -= dn;
+      qsave = qp[qn];
+      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
+      MPN_COPY_INCR (qp, qp + 1, qn);
+      qp[qn] = qsave;
+    }
+  else    /* (qn < dn) */
+    {
+      mp_ptr q2p;
+#if 0				/* not possible since we demand nn > dn */
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+#endif
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      q2p = TMP_SALLOC_LIMBS (qn + 1);
+      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
+	 callers not to be silly?  */
+      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+	{
+	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
+				    dp - (qn + 1), qn + 1, dinv->inv32);
+	}
+      else
+	{
+	  /* It is tempting to use qp for recursive scratch and put quotient in
+	     tp, but the recursive scratch needs one limb too many.  */
+	  tp = TMP_SALLOC_LIMBS (qn + 1);
+	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
+	}
+      MPN_COPY (qp, q2p + 1, qn);
+    }
+
+  TMP_FREE;
+  return qh;
+}

diff --git a/mpn/generic/div_q.c b/mpn/generic/div_q.c
new file mode 100644
index 0000000..18c4ecf
--- /dev/null
+++ b/mpn/generic/div_q.c

@@ -0,0 +1,313 @@
+/* mpn_div_q -- division for arbitrary size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Compute Q = N/D with truncation.
+     N = {np,nn}
+     D = {dp,dn}
+     Q = {qp,nn-dn+1}
+     T = {scratch,nn+1} is scratch space
+   N and D are both untouched by the computation.
+   N and T may overlap; pass the same space if N is irrelevant after the call,
+   but note that tp needs an extra limb.
+
+   Operand requirements:
+     N >= D > 0
+     dp[dn-1] != 0
+     No overlap between the N, D, and Q areas.
+
+   This division function does not clobber its input operands, since it is
+   intended to support average-O(qn) division, and for that to be effective, it
+   cannot put requirements on callers to copy a O(nn) operand.
+
+   If a caller does not care about the value of {np,nn+1} after calling this
+   function, it should pass np also for the scratch argument.  This function
+   will then save some time and space by avoiding allocation and copying.
+   (FIXME: Is this a good design?  We only really save any copying for
+   already-normalised divisors, which should be rare.  It also prevents us from
+   reasonably asking for all scratch space we need.)
+
+   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
+   the most significant quotient limb?  Look at the 4 main code blocks below
+   (consisting of an outer if-else where each arm contains an if-else). It is
+   tricky for the first code block, since the mpn_*_div_q calls will typically
+   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
+   we generate the most significant quotient limb here, before calling
+   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
+   critical division case (the SB sub-case in particular) copying is not a good
+   idea.
+
+   It might make sense to split the if-else parts of the (qn + FUDGE
+   >= dn) blocks into separate functions, since we could promise quite
+   different things to callers in these two cases.  The 'then' case
+   benefits from np=scratch, and it could perhaps even tolerate qp=np,
+   saving some headache for many callers.
+
+   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
+   operands, we do not reuse the huge scratch for adjustments.  This can be a
+   serious waste of memory for the largest operands.
+*/
+
+/* FUDGE determines when to try getting an approximate quotient from the upper
+   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
+   for the code to be correct.  */
+#define FUDGE 5			/* FIXME: tune this */
+
+#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
+#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
+#ifndef MUPI_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
+#endif
+
+void
+mpn_div_q (mp_ptr qp,
+	   mp_srcptr np, mp_size_t nn,
+	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+  mp_ptr new_dp, new_np, tp, rp;
+  mp_limb_t cy, dh, qh;
+  mp_size_t new_nn, qn;
+  gmp_pi1_t dinv;
+  int cnt;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (nn >= dn);
+  ASSERT (dn > 0);
+  ASSERT (dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
+
+  ASSERT_ALWAYS (FUDGE >= 2);
+
+  dh = dp[dn - 1];
+  if (dn == 1)
+    {
+      mpn_divrem_1 (qp, 0L, np, nn, dh);
+      return;
+    }
+
+  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
+
+  if (qn + FUDGE >= dn)
+    {
+      /* |________________________|
+                          |_______|  */
+      new_np = scratch;
+
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+	{
+	  count_leading_zeros (cnt, dh);
+
+	  cy = mpn_lshift (new_np, np, nn, cnt);
+	  new_np[nn] = cy;
+	  new_nn = nn + (cy != 0);
+
+	  new_dp = TMP_ALLOC_LIMBS (dn);
+	  mpn_lshift (new_dp, dp, dn, cnt);
+
+	  if (dn == 2)
+	    {
+	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
+	    {
+	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+	    {
+	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
+	    }
+	  if (cy == 0)
+	    qp[qn - 1] = qh;
+	  else
+	    ASSERT (qh == 0);
+	}
+      else  /* divisor is already normalised */
+	{
+	  if (new_np != np)
+	    MPN_COPY (new_np, np, nn);
+
+	  if (dn == 2)
+	    {
+	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
+	    }
+	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
+	    {
+	      invert_pi1 (dinv, dh, dp[dn - 2]);
+	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+	    {
+	      invert_pi1 (dinv, dh, dp[dn - 2]);
+	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+	    }
+	  qp[nn - dn] = qh;
+	}
+    }
+  else
+    {
+      /* |________________________|
+                |_________________|  */
+      tp = TMP_ALLOC_LIMBS (qn + 1);
+
+      new_np = scratch;
+      new_nn = 2 * qn + 1;
+      if (new_np == np)
+	/* We need {np,nn} to remain untouched until the final adjustment, so
+	   we need to allocate separate space for new_np.  */
+	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
+
+
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+	{
+	  count_leading_zeros (cnt, dh);
+
+	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
+	  new_np[new_nn] = cy;
+
+	  new_nn += (cy != 0);
+
+	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
+	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
+	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
+
+	  if (qn + 1 == 2)
+	    {
+	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+	    }
+	  if (cy == 0)
+	    tp[qn] = qh;
+	  else if (UNLIKELY (qh != 0))
+	    {
+	      /* This happens only when the quotient is close to B^n and
+		 mpn_*_divappr_q returned B^n.  */
+	      mp_size_t i, n;
+	      n = new_nn - (qn + 1);
+	      for (i = 0; i < n; i++)
+		tp[i] = GMP_NUMB_MAX;
+	      qh = 0;		/* currently ignored */
+	    }
+	}
+      else  /* divisor is already normalised */
+	{
+	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
+
+	  new_dp = (mp_ptr) dp + dn - (qn + 1);
+
+	  if (qn == 2 - 1)
+	    {
+	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+	    }
+	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
+	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+	    }
+	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+	    {
+	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
+	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+	    }
+	  else
+	    {
+	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+	    }
+	  tp[qn] = qh;
+	}
+
+      MPN_COPY (qp, tp + 1, qn);
+      if (tp[0] <= 4)
+        {
+	  mp_size_t rn;
+
+          rp = TMP_ALLOC_LIMBS (dn + qn);
+          mpn_mul (rp, dp, dn, tp + 1, qn);
+	  rn = dn + qn;
+	  rn -= rp[rn - 1] == 0;
+
+          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
+            MPN_DECR_U (qp, qn, 1);
+        }
+    }
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/div_qr_1.c b/mpn/generic/div_qr_1.c
new file mode 100644
index 0000000..8f80d37
--- /dev/null
+++ b/mpn/generic/div_qr_1.c

@@ -0,0 +1,125 @@
+/* mpn_div_qr_1 -- mpn by limb division.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD 3
+#endif
+#ifndef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD 3
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
+ * n-1}, and the high quotient limb at *qh. Returns remainder. */
+mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
+	      mp_limb_t d)
+{
+  unsigned cnt;
+  mp_limb_t uh;
+
+  ASSERT (n > 0);
+  ASSERT (d > 0);
+
+  if (d & GMP_NUMB_HIGHBIT)
+    {
+      /* Normalized case */
+      mp_limb_t dinv, q;
+
+      uh = up[--n];
+
+      q = (uh >= d);
+      *qh = q;
+      uh -= (-q) & d;
+
+      if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
+	{
+	  cnt = 0;
+	plain:
+	  while (n > 0)
+	    {
+	      mp_limb_t ul = up[--n];
+	      udiv_qrnnd (qp[n], uh, uh, ul, d);
+	    }
+	  return uh >> cnt;
+	}
+      invert_limb (dinv, d);
+      return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
+    }
+  else
+    {
+      /* Unnormalized case */
+      mp_limb_t dinv, ul;
+
+      if (! UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  uh = up[--n];
+	  udiv_qrnnd (*qh, uh, CNST_LIMB(0), uh, d);
+	  cnt = 0;
+	  goto plain;
+	}
+
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+
+#if HAVE_NATIVE_mpn_div_qr_1u_pi1
+      /* FIXME: Call loop doing on-the-fly normalization */
+#endif
+
+      /* Shift up front, use qp area for shifted copy. A bit messy,
+	 since we have only n-1 limbs available, and shift the high
+	 limb manually. */
+      uh = up[--n];
+      ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
+      uh >>= (GMP_LIMB_BITS - cnt);
+
+      if (UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  udiv_qrnnd (*qh, uh, uh, ul, d);
+	  up = qp;
+	  goto plain;
+	}
+      invert_limb (dinv, d);
+
+      udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
+      return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
+    }
+}

diff --git a/mpn/generic/div_qr_1n_pi1.c b/mpn/generic/div_qr_1n_pi1.c
new file mode 100644
index 0000000..4977131
--- /dev/null
+++ b/mpn/generic/div_qr_1n_pi1.c

@@ -0,0 +1,505 @@
+/* mpn_div_qr_1n_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+#ifndef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#endif
+
+/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %k2\n\t"					\
+	     "adc	%4, %k1\n\t"					\
+	     "sbb	%k0, %k0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %q2\n\t"					\
+	     "adc	%4, %q1\n\t"					\
+	     "sbb	%q0, %q0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxcc	%r3, %4, %1\n\t"				\
+	     "subx	%%g0, %%g0, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addccc	%r7, %8, %%g0\n\t"				\
+	     "addccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
+	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
+	 __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
+	     "adde	%1, %3, %4\n\t"					\
+	     "subfe	%0, %0, %0\n\t"					\
+	     "nor	%0, %0, %0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)			\
+	     __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "algr	%2, %6\n\t"					\
+	     "alcgr	%1, %4\n\t"					\
+	     "lghi	%0, 0\n\t"					\
+	     "alcgr	%0, %0\n\t"					\
+	     "lcgr	%0, %0"						\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
+	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %5, %6\n\t"					\
+	     "adcs	%1, %3, %4\n\t"					\
+	     "movcc	%0, #0\n\t"					\
+	     "movcs	%0, #-1"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %x5, %6\n\t"				\
+	     "adcs	%1, %x3, %x4\n\t"				\
+	     "csinv	%0, xzr, xzr, cc\n\t"				\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rZ" (ah), "rZ" (bh), "%rZ" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (m) = - (__c1 + (__s1 < __c0));					\
+  } while (0)
+#endif
+
+#if DIV_QR_1N_METHOD == 1
+
+/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
+   Requires that uh < d. */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  ASSERT (n > 0);
+  ASSERT (uh < d);
+  ASSERT (d & GMP_NUMB_HIGHBIT);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
+
+  do
+    {
+      mp_limb_t q, ul;
+
+      ul = up[--n];
+      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
+      qp[n] = q;
+    }
+  while (n > 0);
+
+  return uh;
+}
+
+#elif DIV_QR_1N_METHOD == 2
+
+/* The main idea of this algorithm is to write B^2 = d (B + dinv) +
+   B2, where 1 <= B2 < d. Similarly to mpn_mod_1_1p, each iteration
+   can then replace
+
+     u1 B^2 = u1 B2 (mod d)
+
+   which gives a very short critical path for computing the remainder
+   (with some tricks to handle the carry when the next two lower limbs
+   are added in). To also get the quotient, include the corresponding
+   multiple of d in the expression,
+
+     u1 B^2 = u1 B2 + (u1 dinv + u1 B) d
+
+   We get the quotient by accumulating the (u1 dinv + u1 B) terms. The
+   two multiplies, u1 * B2 and u1 * dinv, are independent, and can be
+   executed in parallel.
+ */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  mp_limb_t B2;
+  mp_limb_t u0, u2;
+  mp_limb_t q0, q1;
+  mp_limb_t p0, p1;
+  mp_limb_t t;
+  mp_size_t j;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  ASSERT (n > 0);
+  ASSERT (u1 < d);
+
+  if (n == 1)
+    {
+      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+      return u1;
+    }
+
+  /* FIXME: Could be precomputed */
+  B2 = -d*dinv;
+
+  umul_ppmm (q1, q0, dinv, u1);
+  umul_ppmm (p1, p0, B2, u1);
+  q1 += u1;
+  ASSERT (q1 >= u1);
+  u0 = up[n-1];	/* Early read, to allow qp == up. */
+  qp[n-1] = q1;
+
+  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
+
+  /* FIXME: Keep q1 in a variable between iterations, to reduce number
+     of memory accesses. */
+  for (j = n-2; j-- > 0; )
+    {
+      mp_limb_t q2, cy;
+
+      /* Additions for the q update:
+       *	+-------+
+       *        |u1 * v |
+       *        +---+---+
+       *        | u1|
+       *    +---+---+
+       *    | 1 | v |  (conditional on u2)
+       *    +---+---+
+       *        | 1 |  (conditional on u0 + u2 B2 carry)
+       *        +---+
+       * +      | q0|
+       *   -+---+---+---+
+       *    | q2| q1| q0|
+       *    +---+---+---+
+      */
+      umul_ppmm (p1, t, u1, dinv);
+      ADDC_LIMB (cy, u0, u0, u2 & B2);
+      u0 -= (-cy) & d;
+      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
+      q0 = t;
+
+      /* Note that p1 + cy cannot overflow */
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1 + cy);
+
+      umul_ppmm (p1, p0, u1, B2);
+
+      qp[j+1] = q1;
+      MPN_INCR_U (qp+j+2, n-j-2, q2);
+
+      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
+    }
+
+  q1 = (u2 > 0);
+  u1 -= (-q1) & d;
+
+  t = (u1 >= d);
+  q1 += t;
+  u1 -= (-t) & d;
+
+  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+  MPN_INCR_U (qp+1, n-1, q1);
+
+  qp[0] = q0;
+  return u0;
+}
+
+#elif DIV_QR_1N_METHOD == 3
+
+/* This variant handles carry from the u update earlier. This gives a
+   longer critical path, but reduces the work needed for the
+   quotients. */
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  mp_limb_t B2;
+  mp_limb_t cy, u0;
+  mp_limb_t q0, q1;
+  mp_limb_t p0, p1;
+  mp_limb_t t;
+  mp_size_t j;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  ASSERT (n > 0);
+  ASSERT (u1 < d);
+
+  if (n == 1)
+    {
+      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+      return u1;
+    }
+
+  /* FIXME: Could be precomputed */
+  B2 = -d*dinv;
+
+  umul_ppmm (q1, q0, dinv, u1);
+  umul_ppmm (p1, p0, B2, u1);
+  q1 += u1;
+  ASSERT (q1 >= u1);
+  u0 = up[n-1];	/* Early read, to allow qp == up. */
+
+  add_mssaaaa (cy, u1, u0, u0, up[n-2], p1, p0);
+  u1 -= cy & d;
+  q1 -= cy;
+  qp[n-1] = q1;
+
+  /* FIXME: Keep q1 in a variable between iterations, to reduce number
+     of memory accesses. */
+  for (j = n-2; j-- > 0; )
+    {
+      mp_limb_t q2, cy;
+      mp_limb_t t1, t0;
+
+      /* Additions for the q update:
+       *	+-------+
+       *        |u1 * v |
+       *        +---+---+
+       *        | u1|
+       *        +---+
+       *        | 1 |  (conditional on {u1, u0} carry)
+       *        +---+
+       * +      | q0|
+       *   -+---+---+---+
+       *    | q2| q1| q0|
+       *    +---+---+---+
+       *
+       * Additions for the u update:
+       *        +-------+
+       *        |u1 * B2|
+       *        +---+---+
+       *      + |u0 |u-1|
+       *        +---+---+
+       *      - | d |     (conditional on carry)
+       *     ---+---+---+
+       *        |u1 | u0|
+       *        +---+---+
+       *
+      */
+      umul_ppmm (p1, p0, u1, B2);
+      ADDC_LIMB (q2, q1, u1, q0);
+      umul_ppmm (t1, t0, u1, dinv);
+      add_mssaaaa (cy, u1, u0, u0, up[j], p1, p0);
+      u1 -= cy & d;
+
+      /* t1 <= B-2, so cy can be added in without overflow. */
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), t1 - cy);
+      q0 = t0;
+
+      /* Final q update */
+      qp[j+1] = q1;
+      MPN_INCR_U (qp+j+2, n-j-2, q2);
+    }
+
+  q1 = (u1 >= d);
+  u1 -= (-q1) & d;
+
+  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+  MPN_INCR_U (qp+1, n-1, q1);
+
+  qp[0] = q0;
+  return u0;
+}
+
+#elif DIV_QR_1N_METHOD == 4
+
+mp_limb_t
+mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
+		   mp_limb_t d, mp_limb_t dinv)
+{
+  mp_limb_t B2;
+  mp_limb_t u2, u0;
+  mp_limb_t q0, q1;
+  mp_limb_t p0, p1;
+  mp_limb_t B2d0, B2d1;
+  mp_limb_t t;
+  mp_size_t j;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  ASSERT (n > 0);
+  ASSERT (u1 < d);
+
+  if (n == 1)
+    {
+      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
+      return u1;
+    }
+
+  /* FIXME: Could be precomputed */
+  B2 = -d*dinv;
+  /* B2 * (B-d) */
+  umul_ppmm (B2d1, B2d0, B2, -d);
+
+  umul_ppmm (q1, q0, dinv, u1);
+  umul_ppmm (p1, p0, B2, u1);
+  q1 += u1;
+  ASSERT (q1 >= u1);
+
+  add_mssaaaa (u2, u1, u0, up[n-1], up[n-2], p1, p0);
+
+  /* After read of up[n-1], to allow qp == up. */
+  qp[n-1] = q1 - u2;
+
+  /* FIXME: Keep q1 in a variable between iterations, to reduce number
+     of memory accesses. */
+  for (j = n-2; j-- > 0; )
+    {
+      mp_limb_t q2, cy;
+      mp_limb_t t1, t0;
+
+      /* Additions for the q update. *After* u1 -= u2 & d adjustment.
+       *	+-------+
+       *        |u1 * v |
+       *        +---+---+
+       *        | u1|
+       *        +---+
+       *        | 1 |  (conditional on {u1, u0} carry)
+       *        +---+
+       * +      | q0|
+       *   -+---+---+---+
+       *    | q2| q1| q0|
+       *    +---+---+---+
+       *
+       * Additions for the u update. *Before* u1 -= u2 & d adjstment.
+       *        +-------+
+       *        |u1 * B2|
+       *        +---+---+
+       *        |u0 |u-1|
+       *        +---+---+
+       + +      |B2(B-d)| (conditional on u2)
+       *   -+---+---+---+
+       *    |u2 |u1 | u0|
+       *    +---+---+---+
+       *
+      */
+       /* Multiply with unadjusted u1, to shorten critical path. */
+      umul_ppmm (p1, p0, u1, B2);
+      u1 -= (d & u2);
+      ADDC_LIMB (q2, q1, u1, q0);
+      umul_ppmm (t1, t0, u1, dinv);
+
+      add_mssaaaa (cy, u1, u0, u0, up[j], u2 & B2d1, u2 & B2d0);
+      add_mssaaaa (u2, u1, u0, u1, u0, p1, p0);
+      u2 += cy;
+      ASSERT(-u2 <= 1);
+
+      /* t1 <= B-2, so u2 can be added in without overflow. */
+      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), t1 - u2);
+      q0 = t0;
+
+      /* Final q update */
+      qp[j+1] = q1;
+      MPN_INCR_U (qp+j+2, n-j-2, q2);
+    }
+  u1 -= u2 & d;
+
+  q1 = (u1 >= d);
+  u1 -= (-q1) & d;
+
+  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
+  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
+
+  MPN_INCR_U (qp+1, n-1, q1);
+
+  qp[0] = q0;
+  return u0;
+}
+#else
+#error Unknown DIV_QR_1N_METHOD
+#endif

diff --git a/mpn/generic/div_qr_1n_pi2.c b/mpn/generic/div_qr_1n_pi2.c
new file mode 100644
index 0000000..daae68f
--- /dev/null
+++ b/mpn/generic/div_qr_1n_pi2.c

@@ -0,0 +1,203 @@
+/* mpn_div_qr_1n_pi2.
+
+   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* ISSUES:
+
+   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+   * Are there any problems with generating n quotient limbs in the q area?  It
+     surely simplifies things.
+
+   * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+  mp_limb_t dip[2];
+  mp_limb_t d;
+  int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1n_pi2 (mp_ptr qp,
+		   mp_srcptr up, mp_size_t un,
+		   struct precomp_div_1_pi2 *pd)
+{
+  mp_limb_t most_significant_q_limb;
+  mp_size_t i;
+  mp_limb_t r, u2, u1, u0;
+  mp_limb_t d0, di1, di0;
+  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+  mp_limb_t cnd;
+
+  ASSERT (un >= 2);
+  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+  ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+  up += un - 3;
+  r = up[2];
+  d0 = pd->d;
+
+  most_significant_q_limb = (r >= d0);
+  r -= d0 & -most_significant_q_limb;
+
+  qp += un - 3;
+  qp[2] = most_significant_q_limb;
+
+  di1 = pd->dip[1];
+  di0 = pd->dip[0];
+
+  for (i = un - 3; i >= 0; i -= 2)
+    {
+      u2 = r;
+      u1 = up[1];
+      u0 = up[0];
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+
+      up -= 2;
+      qp -= 2;
+    }
+
+  if ((un & 1) == 0)
+    {
+      u2 = r;
+      u1 = up[1];
+
+      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+      qp[1] = q3;
+    }
+
+  return r;
+
+#undef q3
+#undef q2
+#undef q1
+}

diff --git a/mpn/generic/div_qr_1u_pi2.c b/mpn/generic/div_qr_1u_pi2.c
new file mode 100644
index 0000000..ea38e3c
--- /dev/null
+++ b/mpn/generic/div_qr_1u_pi2.c

@@ -0,0 +1,236 @@
+/* mpn_div_qr_1u_pi2.
+
+   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* ISSUES:
+
+   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+   * Are there any problems with generating n quotient limbs in the q area?  It
+     surely simplifies things.
+
+   * Not yet adequately tested.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+  mp_limb_t dip[2];
+  mp_limb_t d;
+  int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1u_pi2 (mp_ptr qp,
+		   mp_srcptr up, mp_size_t un,
+		   struct precomp_div_1_pi2 *pd)
+{
+  mp_size_t i;
+  mp_limb_t r, u2, u1, u0;
+  mp_limb_t d0, di1, di0;
+  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+  mp_limb_t cnd;
+  int cnt;
+
+  ASSERT (un >= 2);
+  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
+  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+  ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+  up += un - 3;
+  cnt = pd->norm_cnt;
+  r = up[2] >> (GMP_NUMB_BITS - cnt);
+  d0 = pd->d << cnt;
+
+  qp += un - 2;
+
+  di1 = pd->dip[1];
+  di0 = pd->dip[0];
+
+  for (i = un - 3; i >= 0; i -= 2)
+    {
+      u2 = r;
+      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+      u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+
+      up -= 2;
+      qp -= 2;
+    }
+
+  if ((un & 1) != 0)
+    {
+      u2 = r;
+      u1 = (up[2] << cnt);
+
+      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+      qp[1] = q3;
+    }
+  else
+    {
+      u2 = r;
+      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
+      u0 = (up[1] << cnt);
+
+      /* Dividend in {r,u1,u0} */
+
+      umul_ppmm (q1d,q0d, u1, di0);
+      umul_ppmm (q2b,q1b, u1, di1);
+      q2b++;				/* cannot spill */
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+      umul_ppmm (q2c,q1c, u2,  di0);
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+      umul_ppmm (q3a,q2a, u2, di1);
+
+      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+      q3 += r;
+
+      r = u0 - q2 * d0;
+
+      cnd = (r >= q1);
+      r += d0 & -cnd;
+      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
+
+      if (UNLIKELY (r >= d0))
+	{
+	  r -= d0;
+	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
+	}
+
+      qp[0] = q2;
+      qp[1] = q3;
+    }
+
+  return r >> cnt;
+
+#undef q3
+#undef q2
+#undef q1
+}

diff --git a/mpn/generic/div_qr_2.c b/mpn/generic/div_qr_2.c
new file mode 100644
index 0000000..c3c8f57
--- /dev/null
+++ b/mpn/generic/div_qr_2.c

@@ -0,0 +1,314 @@
+/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_2_PI2_THRESHOLD
+/* Disabled unless explicitly tuned. */
+#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
+#endif
+
+#ifndef SANITY_CHECK
+#define SANITY_CHECK 0
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+     an additional sum operand.
+   * add_csaac accepts two addends and a carry in, and generates a sum and a
+     carry out.  A little like a "full adder".
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((USItype)(s2)),					\
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "0"  ((UDItype)(s2)),					\
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %x3, xzr"\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3"	\
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
+	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)	\
+	     __CLOBBER_CC)
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (s2) += __c1 + (__s1 < __c0);					\
+  } while (0)
+#endif
+
+/* Typically used with r1, r0 same as n3, n2. Other types of overlap
+   between inputs and outputs are not supported. */
+#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0)		\
+  do {									\
+    mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0;		\
+    mp_limb_t _t1, _t0;							\
+    mp_limb_t _mask;							\
+									\
+    /* [q3,q2,q1,q0] = [n3,n2]*[di1,di0] + [n3,n2,n1,n0] + [0,1,0,0] */	\
+    umul_ppmm (_q2,_q1, n2, di1);					\
+    umul_ppmm (_q3,_q2a, n3, di1);					\
+    ++_q2;	/* _q2 cannot overflow */				\
+    add_ssaaaa (_q3,_q2, _q3,_q2, n3,_q2a);				\
+    umul_ppmm (_q2c,_q1c, n3, di0);					\
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, n2,_q1c);			\
+    umul_ppmm (_q1d,_q0, n2, di0);					\
+    add_sssaaaa (_q2c,_q1,_q0, _q1,_q0, n1,n0); /* _q2c cannot overflow */ \
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1d);			\
+									\
+    umul_ppmm (_t1,_t0, _q2, d0);					\
+    _t1 += _q2 * d1 + _q3 * d0;						\
+									\
+    sub_ddmmss (r1, r0, n1, n0, _t1, _t0);				\
+									\
+    _mask = -(mp_limb_t) ((r1 >= _q1) & ((r1 > _q1) | (r0 >= _q0)));  /* (r1,r0) >= (q1,q0) */  \
+    add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask);		\
+    sub_ddmmss (_q3, _q2, _q3, _q2, CNST_LIMB(0), -_mask);		\
+									\
+    if (UNLIKELY (r1 >= d1))						\
+      {									\
+	if (r1 > d1 || r0 >= d0)					\
+	  {								\
+	    sub_ddmmss (r1, r0, r1, r0, d1, d0);			\
+	    add_ssaaaa (_q3, _q2, _q3, _q2, CNST_LIMB(0), CNST_LIMB(1));\
+	  }								\
+      }									\
+    (q1) = _q3;								\
+    (q0) = _q2;								\
+  } while (0)
+
+static void
+invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
+{
+  mp_limb_t v1, v0, p1, t1, t0, p0, mask;
+  invert_limb (v1, d1);
+  p1 = d1 * v1;
+  /* <1, v1> * d1 = <B-1, p1> */
+  p1 += d0;
+  if (p1 < d0)
+    {
+      v1--;
+      mask = -(mp_limb_t) (p1 >= d1);
+      p1 -= d1;
+      v1 += mask;
+      p1 -= mask & d1;
+    }
+  /* <1, v1> * d1 + d0 = <B-1, p1> */
+  umul_ppmm (t1, p0, d0, v1);
+  p1 += t1;
+  if (p1 < t1)
+    {
+      if (UNLIKELY (p1 >= d1))
+	{
+	  if (p1 > d1 || p0 >= d0)
+	    {
+	      sub_ddmmss (p1, p0, p1, p0, d1, d0);
+	      v1--;
+	    }
+	}
+      sub_ddmmss (p1, p0, p1, p0, d1, d0);
+      v1--;
+    }
+  /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
+   * with <p1, p0> + <d1, d0> >= B^2.
+   *
+   * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
+   * partial remainder after <1, v1> is
+   *
+   * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
+   *                              = <~p1, ~p0, B-1>
+   */
+  udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
+  di[0] = v0;
+  di[1] = v1;
+
+#if SANITY_CHECK
+  {
+    mp_limb_t tp[4];
+    mp_limb_t dp[2];
+    dp[0] = d0;
+    dp[1] = d1;
+    mpn_mul_n (tp, dp, di, 2);
+    ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
+    ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
+  }
+#endif
+}
+
+static mp_limb_t
+mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  r1 = np[nn-1];
+  r0 = np[nn-2];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2; i >= 2; i -= 2)
+    {
+      mp_limb_t n1, n0, q1, q0;
+      n1 = np[i-1];
+      n0 = np[i-2];
+      udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
+      qp[i-1] = q1;
+      qp[i-2] = q0;
+    }
+
+  if (i > 0)
+    {
+      mp_limb_t q;
+      udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
+      qp[0] = q;
+    }
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
+   significant quotient limbs at qp and the 2 long remainder at np.
+   Return the most significant limb of the quotient.
+
+   Preconditions:
+   1. qp must either not overlap with the other operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.)
+   2. nn >= 2.  */
+
+mp_limb_t
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp)
+{
+  mp_limb_t d1;
+  mp_limb_t d0;
+  gmp_pi1_t dinv;
+
+  ASSERT (nn >= 2);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  d1 = dp[1]; d0 = dp[0];
+
+  ASSERT (d1 > 0);
+
+  if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
+    {
+      if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
+	{
+	  gmp_pi1_t dinv;
+	  invert_pi1 (dinv, d1, d0);
+	  return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
+	}
+      else
+	{
+	  mp_limb_t di[2];
+	  invert_4by2 (di, d1, d0);
+	  return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
+	}
+    }
+  else
+    {
+      int shift;
+      count_leading_zeros (shift, d1);
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+      invert_pi1 (dinv, d1, d0);
+      return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
+    }
+}

diff --git a/mpn/generic/div_qr_2n_pi1.c b/mpn/generic/div_qr_2n_pi1.c
new file mode 100644
index 0000000..131a811
--- /dev/null
+++ b/mpn/generic/div_qr_2n_pi1.c

@@ -0,0 +1,84 @@
+/* mpn_div_qr_2n_pi1
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for normalized divisor */
+mp_limb_t
+mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  np += nn - 2;
+  r1 = np[1];
+  r0 = np[0];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+      np--;
+      qp[i] = q;
+    }
+
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}

diff --git a/mpn/generic/div_qr_2u_pi1.c b/mpn/generic/div_qr_2u_pi1.c
new file mode 100644
index 0000000..70e617b
--- /dev/null
+++ b/mpn/generic/div_qr_2u_pi1.c

@@ -0,0 +1,76 @@
+/* mpn_div_qr_2u_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
+   d0, while {np,nn} is shifted on the fly. */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_limb_t r2, r1, r0;
+  mp_size_t i;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+  ASSERT (shift > 0);
+
+  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+  r0 = np[nn-2] << shift;
+
+  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      r0 = np[i];
+      r1 |= r0 >> (GMP_LIMB_BITS - shift);
+      r0 <<= shift;
+      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+      qp[i] = q;
+    }
+
+  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+  rp[1] = r2 >> shift;
+
+  return qh;
+}

diff --git a/mpn/generic/dive_1.c b/mpn/generic/dive_1.c
new file mode 100644
index 0000000..056f5b9
--- /dev/null
+++ b/mpn/generic/dive_1.c

@@ -0,0 +1,146 @@
+/* mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
+   q will only be correct if d divides a exactly.
+
+   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+   give zero on all CPUs (for instance it doesn't on the x86s).  This
+   separate loop might run faster too, helping odd divisors.
+
+   Possibilities:
+
+   mpn_divexact_1c could be created, accepting and returning c.  This would
+   let a long calculation be done piece by piece.  Currently there's no
+   particular need for that, and not returning c means that a final umul can
+   be skipped.
+
+   Another use for returning c would be letting the caller know whether the
+   division was in fact exact.  It would work just to return the carry bit
+   "c=(l>s)" and let the caller do a final umul if interested.
+
+   When the divisor is even, the factors of two could be handled with a
+   separate mpn_rshift, instead of shifting on the fly.  That might be
+   faster on some CPUs and would mean just the shift==0 style loop would be
+   needed.
+
+   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+   shift==0 loop is unnecessary, and could be eliminated if there's no great
+   speed difference.
+
+   It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
+   2.95 for instance has a poor "/" and might prefer the modular method.
+   Perhaps a tuned parameter should control this.
+
+   If src[size-1] < divisor then dst[size-1] will be zero, and one divide
+   step could be skipped.  A test at last step for s<divisor (or ls in the
+   even case) might be a good way to do that.  But if this code is often
+   used with small divisors then it might not be worth bothering  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (shift, divisor);
+      divisor >>= shift;
+    }
+  else
+    shift = 0;
+
+  binvert_limb (inverse, divisor);
+  divisor <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+
+      s = src[0];
+
+      for (i = 1; i < size; i++)
+	{
+	  s_next = src[i];
+	  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+	  s = s_next;
+
+	  SUBC_LIMB (c, l, ls, c);
+
+	  l = (l * inverse) & GMP_NUMB_MASK;
+	  dst[i - 1] = l;
+
+	  umul_ppmm (h, dummy, l, divisor);
+	  c += h;
+	}
+
+      ls = s >> shift;
+      l = ls - c;
+      l = (l * inverse) & GMP_NUMB_MASK;
+      dst[size - 1] = l;
+    }
+  else
+    {
+      s = src[0];
+
+      l = (s * inverse) & GMP_NUMB_MASK;
+      dst[0] = l;
+      c = 0;
+
+      for (i = 1; i < size; i++)
+	{
+	  umul_ppmm (h, dummy, l, divisor);
+	  c += h;
+
+	  s = src[i];
+	  SUBC_LIMB (c, l, s, c);
+
+	  l = (l * inverse) & GMP_NUMB_MASK;
+	  dst[i] = l;
+	}
+    }
+}

diff --git a/mpn/generic/diveby3.c b/mpn/generic/diveby3.c
new file mode 100644
index 0000000..7dee0bc
--- /dev/null
+++ b/mpn/generic/diveby3.c

@@ -0,0 +1,173 @@
+/* mpn_divexact_by3c -- mpn exact division by 3.
+
+Copyright 2000-2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if DIVEXACT_BY3_METHOD == 0
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t r;
+  r = mpn_bdiv_dbm1c (rp, up, un, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * c);
+
+  /* Possible bdiv_dbm1 return values are C * (GMP_NUMB_MASK / 3), 0 <= C < 3.
+     We want to return C.  We compute the remainder mod 4 and notice that the
+     inverse of (2^(2k)-1)/3 mod 4 is 1.  */
+  return r & 3;
+}
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 1
+
+/* The algorithm here is basically the same as mpn_divexact_1, as described
+   in the manual.  Namely at each step q = (src[i]-c)*inverse, and new c =
+   borrow(src[i]-c) + high(divisor*q).  But because the divisor is just 3,
+   high(divisor*q) can be determined with two comparisons instead of a
+   multiply.
+
+   The "c += ..."s add the high limb of 3*l to c.  That high limb will be 0,
+   1 or 2.  Doing two separate "+="s seems to give better code on gcc (as of
+   2.95.2 at least).
+
+   It will be noted that the new c is formed by adding three values each 0
+   or 1.  But the total is only 0, 1 or 2.  When the subtraction src[i]-c
+   causes a borrow, that leaves a limb value of either 0xFF...FF or
+   0xFF...FE.  The multiply by MODLIMB_INVERSE_3 gives 0x55...55 or
+   0xAA...AA respectively, and in those cases high(3*q) is only 0 or 1
+   respectively, hence a total of no more than 2.
+
+   Alternatives:
+
+   This implementation has each multiply on the dependent chain, due to
+   "l=s-c".  See below for alternative code which avoids that.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t  l, q, s;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (c == 0 || c == 1 || c == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  i = 0;
+  do
+    {
+      s = up[i];
+      SUBC_LIMB (c, l, s, c);
+
+      q = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+      rp[i] = q;
+
+      c += (q >= GMP_NUMB_CEIL_MAX_DIV3);
+      c += (q >= GMP_NUMB_CEIL_2MAX_DIV3);
+    }
+  while (++i < un);
+
+  ASSERT (c == 0 || c == 1 || c == 2);
+  return c;
+}
+
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 2
+
+/* The following alternative code re-arranges the quotient calculation from
+   (src[i]-c)*inverse to instead
+
+       q = src[i]*inverse - c*inverse
+
+   thereby allowing src[i]*inverse to be scheduled back as far as desired,
+   making full use of multiplier throughput and leaving just some carry
+   handing on the dependent chain.
+
+   The carry handling consists of determining the c for the next iteration.
+   This is the same as described above, namely look for any borrow from
+   src[i]-c, and at the high of 3*q.
+
+   high(3*q) is done with two comparisons as above (in c2 and c3).  The
+   borrow from src[i]-c is incorporated into those by noting that if there's
+   a carry then then we have src[i]-c == 0xFF..FF or 0xFF..FE, in turn
+   giving q = 0x55..55 or 0xAA..AA.  Adding 1 to either of those q values is
+   enough to make high(3*q) come out 1 bigger, as required.
+
+   l = -c*inverse is calculated at the same time as c, since for most chips
+   it can be more conveniently derived from separate c1/c2/c3 values than
+   from a combined c equal to 0, 1 or 2.
+
+   The net effect is that with good pipelining this loop should be able to
+   run at perhaps 4 cycles/limb, depending on available execute resources
+   etc.
+
+   Usage:
+
+   This code is not used by default, since we really can't rely on the
+   compiler generating a good software pipeline, nor on such an approach
+   even being worthwhile on all CPUs.
+
+   Itanium is one chip where this algorithm helps though, see
+   mpn/ia64/diveby3.asm.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t cy)
+{
+  mp_limb_t  s, sm, cl, q, qx, c2, c3;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (cy == 0 || cy == 1 || cy == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  cl = cy == 0 ? 0 : cy == 1 ? -MODLIMB_INVERSE_3 : -2*MODLIMB_INVERSE_3;
+
+  for (i = 0; i < un; i++)
+    {
+      s = up[i];
+      sm = (s * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+
+      q = (cl + sm) & GMP_NUMB_MASK;
+      rp[i] = q;
+      qx = q + (s < cy);
+
+      c2 = qx >= GMP_NUMB_CEIL_MAX_DIV3;
+      c3 = qx >= GMP_NUMB_CEIL_2MAX_DIV3 ;
+
+      cy = c2 + c3;
+      cl = (-c2 & -MODLIMB_INVERSE_3) + (-c3 & -MODLIMB_INVERSE_3);
+    }
+
+  return cy;
+}
+
+#endif

diff --git a/mpn/generic/divexact.c b/mpn/generic/divexact.c
new file mode 100644
index 0000000..ec417df
--- /dev/null
+++ b/mpn/generic/divexact.c

@@ -0,0 +1,296 @@
+/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
+   the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
+   between Q and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if 1
+void
+mpn_divexact (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn)
+{
+  unsigned shift;
+  mp_size_t qn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn-1] > 0);
+
+  while (dp[0] == 0)
+    {
+      ASSERT (np[0] == 0);
+      dp++;
+      np++;
+      dn--;
+      nn--;
+    }
+
+  if (dn == 1)
+    {
+      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
+      return;
+    }
+
+  TMP_MARK;
+
+  qn = nn + 1 - dn;
+  count_trailing_zeros (shift, dp[0]);
+
+  if (shift > 0)
+    {
+      mp_ptr wp;
+      mp_size_t ss;
+      ss = (dn > qn) ? qn + 1 : dn;
+
+      tp = TMP_ALLOC_LIMBS (ss);
+      mpn_rshift (tp, dp, ss, shift);
+      dp = tp;
+
+      /* Since we have excluded dn == 1, we have nn > qn, and we need
+	 to shift one limb beyond qn. */
+      wp = TMP_ALLOC_LIMBS (qn + 1);
+      mpn_rshift (wp, np, qn + 1, shift);
+      np = wp;
+    }
+
+  if (dn > qn)
+    dn = qn;
+
+  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
+  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
+  TMP_FREE;
+
+  /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
+  mpn_neg (qp, qp, qn);
+}
+
+#else
+
+/* We use the Jebelean's bidirectional exact division algorithm.  This is
+   somewhat naively implemented, with equal quotient parts done by 2-adic
+   division and truncating division.  Since 2-adic division is faster, it
+   should be used for a larger chunk.
+
+   This code is horrendously ugly, in all sorts of ways.
+
+   * It was hacked without much care or thought, but with a testing program.
+   * It handles scratch space frivolously, and furthermore the itch function
+     is broken.
+   * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
+     have yet to provoke an error due to this, though.
+   * Algorithm selection leaves a lot to be desired.  In particular, the choice
+     between DC and MU isn't a point, but we treat it like one.
+   * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
+     that the latter is faster.  We should at least reverse this, but perhaps
+     we should make the lsb part considerably larger.  (How do we tune this?)
+*/
+
+mp_size_t
+mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
+{
+  return nn + dn;		/* FIXME this is not right */
+}
+
+void
+mpn_divexact (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t nn0, qn0;
+  mp_size_t nn1, qn1;
+  mp_ptr tp;
+  mp_limb_t qml;
+  mp_limb_t qh;
+  int cnt;
+  mp_ptr xdp;
+  mp_limb_t di;
+  mp_limb_t cy;
+  gmp_pi1_t dinv;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn + 1;
+
+  /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
+  if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
+    {
+      tp = scratch;
+      MPN_COPY (tp, np, qn);
+      binvert_limb (di, dp[0]);  di = -di;
+      dn = MIN (dn, qn);
+      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+      TMP_FREE;
+      return;
+    }
+
+  qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
+
+  /* If quotient is much larger than the divisor, the bidirectional algorithm
+     does not work as currently implemented.  Fall back to plain bdiv.  */
+  if (qn0 > dn)
+    {
+      if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+	{
+	  tp = scratch;
+	  MPN_COPY (tp, np, qn);
+	  binvert_limb (di, dp[0]);  di = -di;
+	  dn = MIN (dn, qn);
+	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	}
+      else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+	{
+	  tp = scratch;
+	  MPN_COPY (tp, np, qn);
+	  binvert_limb (di, dp[0]);  di = -di;
+	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	}
+      else
+	{
+	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
+	}
+      TMP_FREE;
+      return;
+    }
+
+  nn0 = qn0 + qn0;
+
+  nn1 = nn0 - 1 + ((nn-dn) & 1);
+  qn1 = qn0;
+  if (LIKELY (qn0 != dn))
+    {
+      nn1 = nn1 + 1;
+      qn1 = qn1 + 1;
+      if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
+	{
+	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
+	     to include an extra limb in order to get the needed overlap.  */
+	  /* FIXME: Now with the mu_divappr_q function, we should really need
+	     more overlap. That indicates one of two things: (1) The test code
+	     is not good. (2) We actually overlap too much by default.  */
+	  nn1 = nn1 + 1;
+	  qn1 = qn1 + 1;
+	}
+    }
+
+  tp = TMP_ALLOC_LIMBS (nn1 + 1);
+
+  count_leading_zeros (cnt, dp[dn - 1]);
+
+  /* Normalize divisor, store into tmp area.  */
+  if (cnt != 0)
+    {
+      xdp = TMP_ALLOC_LIMBS (qn1);
+      mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
+    }
+  else
+    {
+      xdp = (mp_ptr) dp + dn - qn1;
+    }
+
+  /* Shift dividend according to the divisor normalization.  */
+  /* FIXME: We compute too much here for XX_divappr_q, but these functions'
+     interfaces want a pointer to the imaginative least significant limb, not
+     to the least significant *used* limb.  Of course, we could leave nn1-qn1
+     rubbish limbs in the low part, to save some time.  */
+  if (cnt != 0)
+    {
+      cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
+      if (cy != 0)
+	{
+	  tp[nn1] = cy;
+	  nn1++;
+	}
+    }
+  else
+    {
+      /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
+	 mpn_sub_n right before is executed.  */
+      MPN_COPY (tp, np + nn - nn1, nn1);
+    }
+
+  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
+  if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+    }
+  else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+    }
+  else
+    {
+      /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
+	 conditional subtraction here.  */
+      qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
+      if (qh)
+	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
+      mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
+      qp[qn0 - 1 + nn1 - qn1] = qh;
+    }
+  qml = qp[qn0 - 1];
+
+  binvert_limb (di, dp[0]);  di = -di;
+
+  if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
+    }
+
+  if (qml < qp[qn0 - 1])
+    mpn_decr_u (qp + qn0, 1);
+
+  TMP_FREE;
+}
+#endif

diff --git a/mpn/generic/divis.c b/mpn/generic/divis.c
new file mode 100644
index 0000000..f989ddb
--- /dev/null
+++ b/mpn/generic/divis.c

@@ -0,0 +1,194 @@
+/* mpn_divisible_p -- mpn by mpn divisibility test
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2014, 2017, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Determine whether A={ap,an} is divisible by D={dp,dn}.  Must have both
+   operands normalized, meaning high limbs non-zero, except that an==0 is
+   allowed.
+
+   There usually won't be many low zero bits on D, but the checks for this
+   are fast and might pick up a few operand combinations, in particular they
+   might reduce D to fit the single-limb mod_1/modexact_1 code.
+
+   Future:
+
+   Getting the remainder limb by limb would make an early exit possible on
+   finding a non-zero.  This would probably have to be bdivmod style so
+   there's no addback, but it would need a multi-precision inverse and so
+   might be slower than the plain method (on small sizes at least).
+
+   When D must be normalized (shifted to low bit set), it's possible to
+   suppress the bit-shifting of A down, as long as it's already been checked
+   that A has at least as many trailing zero bits as D.  */
+
+int
+mpn_divisible_p (mp_srcptr ap, mp_size_t an,
+		 mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t  alow, dlow, dmask;
+  mp_ptr     qp, rp, tp;
+  mp_limb_t di;
+  unsigned  twos;
+  int c;
+  TMP_DECL;
+
+  ASSERT (an >= 0);
+  ASSERT (an == 0 || ap[an-1] != 0);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] != 0);
+  ASSERT_MPN (ap, an);
+  ASSERT_MPN (dp, dn);
+
+  /* When a<d only a==0 is divisible.
+     Notice this test covers all cases of an==0. */
+  if (an < dn)
+    return (an == 0);
+
+  /* Strip low zero limbs from d, requiring a==0 on those. */
+  for (;;)
+    {
+      alow = *ap;
+      dlow = *dp;
+
+      if (dlow != 0)
+	break;
+
+      if (alow != 0)
+	return 0;  /* a has fewer low zero limbs than d, so not divisible */
+
+      /* a!=0 and d!=0 so won't get to n==0 */
+      an--; ASSERT (an >= 1);
+      dn--; ASSERT (dn >= 1);
+      ap++;
+      dp++;
+    }
+
+  /* a must have at least as many low zero bits as d */
+  dmask = LOW_ZEROS_MASK (dlow);
+  if ((alow & dmask) != 0)
+    return 0;
+
+  if (dn == 1)
+    {
+      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
+	return mpn_mod_1 (ap, an, dlow) == 0;
+
+      count_trailing_zeros (twos, dlow);
+      dlow >>= twos;
+      return mpn_modexact_1_odd (ap, an, dlow) == 0;
+    }
+
+  count_trailing_zeros (twos, dlow);
+  if (dn == 2)
+    {
+      mp_limb_t  dsecond = dp[1];
+      if (dsecond <= dmask)
+	{
+	  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+	  ASSERT_LIMB (dlow);
+	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+	}
+    }
+
+  /* Should we compute Q = A * D^(-1) mod B^k,
+                       R = A - Q * D  mod B^k
+     here, for some small values of k?  Then check if R = 0 (mod B^k).  */
+
+  /* We could also compute A' = A mod T and D' = D mod P, for some
+     P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
+     dividing D' also divides A'.  */
+
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (rp, an + 1,
+		     qp, an - dn + 1); /* FIXME: Could we avoid this? */
+
+  if (twos != 0)
+    {
+      tp = TMP_ALLOC_LIMBS (dn);
+      ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
+      dp = tp;
+
+      ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
+    }
+  else
+    {
+      MPN_COPY (rp, ap, an);
+    }
+  if (rp[an - 1] >= dp[dn - 1])
+    {
+      rp[an] = 0;
+      an++;
+    }
+  else if (an == dn)
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  ASSERT (an > dn);		/* requirement of functions below */
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
+      mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
+    }
+
+  /* In general, bdiv may return either R = 0 or R = D when D divides
+     A. But R = 0 can happen only when A = 0, which we already have
+     excluded. Furthermore, R == D (mod B^{dn}) implies no carry, so
+     we don't need to check the carry returned from bdiv. */
+
+  MPN_CMP (c, rp, dp, dn);
+
+  TMP_FREE;
+  return c == 0;
+}

diff --git a/mpn/generic/divrem.c b/mpn/generic/divrem.c
new file mode 100644
index 0000000..1da84a8
--- /dev/null
+++ b/mpn/generic/divrem.c

@@ -0,0 +1,103 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+   quotient.  This is now just a middle layer calling mpn_tdiv_qr.
+
+Copyright 1993-1997, 1999-2002, 2005, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+	    mp_ptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT (qxn >= 0);
+  ASSERT (nn >= dn);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  if (dn == 1)
+    {
+      mp_limb_t ret;
+      mp_ptr q2p;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      q2p = TMP_ALLOC_LIMBS (nn + qxn);
+
+      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+      qn = nn + qxn - 1;
+      MPN_COPY (qp, q2p, qn);
+      ret = q2p[qn];
+
+      TMP_FREE;
+      return ret;
+    }
+  else if (dn == 2)
+    {
+      return mpn_divrem_2 (qp, qxn, np, nn, dp);
+    }
+  else
+    {
+      mp_ptr q2p;
+      mp_limb_t qhl;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      if (UNLIKELY (qxn != 0))
+	{
+	  mp_ptr n2p;
+	  TMP_ALLOC_LIMBS_2 (n2p, nn + qxn,
+			     q2p, nn - dn + qxn + 1);
+	  MPN_ZERO (n2p, qxn);
+	  MPN_COPY (n2p + qxn, np, nn);
+	  mpn_tdiv_qr (q2p, np, 0L, n2p, nn + qxn, dp, dn);
+	  qn = nn - dn + qxn;
+	  MPN_COPY (qp, q2p, qn);
+	  qhl = q2p[qn];
+	}
+      else
+	{
+	  q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
+	  mpn_tdiv_qr (q2p, np, 0L, np, nn, dp, dn);
+	  qn = nn - dn;
+	  MPN_COPY (qp, q2p, qn);
+	  qhl = q2p[qn];
+	}
+      TMP_FREE;
+      return qhl;
+    }
+}

diff --git a/mpn/generic/divrem_1.c b/mpn/generic/divrem_1.c
new file mode 100644
index 0000000..c13aa79
--- /dev/null
+++ b/mpn/generic/divrem_1.c

@@ -0,0 +1,254 @@
+/* mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD  0
+#endif
+#ifndef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD  0
+#endif
+
+
+
+/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
+   and UNNORM thresholds are 0 and only the inversion code is included.
+
+   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
+   will be MP_SIZE_T_MAX and only the plain division code is included.
+
+   Otherwise mul-by-inverse is better than plain division above some
+   threshold, and best results are obtained by having code for both present.
+
+   The main reason for separating the norm and unnorm cases is that not all
+   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
+   code used on an already normalized divisor.
+
+   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
+   non-shifting code for both the norm and unnorm cases, though with
+   different criteria for skipping a division, and with different thresholds
+   of course.  And in fact if inversion is never viable, then that simple
+   non-shifting division would be all that's left.
+
+   The NORM and UNNORM thresholds might not differ much, but if there's
+   going to be separate code for norm and unnorm then it makes sense to have
+   separate thresholds.  One thing that's possible is that the
+   mul-by-inverse might be better only for normalized divisors, due to that
+   case not needing variable bit shifts.
+
+   Notice that the thresholds are tested after the decision to possibly skip
+   one divide step, so they're based on the actual number of divisions done.
+
+   For the unnorm case, it would be possible to call mpn_lshift to adjust
+   the dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.  */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+	      mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  n;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  r = 0;
+
+  ASSERT (qxn >= 0);
+  ASSERT (un >= 0);
+  ASSERT (d != 0);
+  /* FIXME: What's the correct overlap rule when qxn!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
+
+  n = un + qxn;
+  if (n == 0)
+    return 0;
+
+  d <<= GMP_NAIL_BITS;
+
+  qp += (n - 1);   /* Make qp point at most significant quotient limb */
+
+  if ((d & GMP_LIMB_HIGHBIT) != 0)
+    {
+      if (un != 0)
+	{
+	  /* High quotient limb is 0 or 1, skip a divide step. */
+	  mp_limb_t q;
+	  r = up[un - 1] << GMP_NAIL_BITS;
+	  q = (r >= d);
+	  *qp-- = q;
+	  r -= (d & -q);
+	  r >>= GMP_NAIL_BITS;
+	  n--;
+	  un--;
+	}
+
+      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
+	{
+	plain:
+	  for (i = un - 1; i >= 0; i--)
+	    {
+	      n0 = up[i] << GMP_NAIL_BITS;
+	      udiv_qrnnd (*qp, r, r, n0, d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r;
+	}
+      else
+	{
+	  /* Multiply-by-inverse, divisor already normalized. */
+	  mp_limb_t dinv;
+	  invert_limb (dinv, d);
+
+	  for (i = un - 1; i >= 0; i--)
+	    {
+	      n0 = up[i] << GMP_NAIL_BITS;
+	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r;
+	}
+    }
+  else
+    {
+      /* Most significant bit of divisor == 0.  */
+      int cnt;
+
+      /* Skip a division if high < divisor (high quotient 0).  Testing here
+	 before normalizing will still skip as often as possible.  */
+      if (un != 0)
+	{
+	  n1 = up[un - 1] << GMP_NAIL_BITS;
+	  if (n1 < d)
+	    {
+	      r = n1 >> GMP_NAIL_BITS;
+	      *qp-- = 0;
+	      n--;
+	      if (n == 0)
+		return r;
+	      un--;
+	    }
+	}
+
+      if (! UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+	goto plain;
+
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+      r <<= cnt;
+
+      if (UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+	{
+	  mp_limb_t nshift;
+	  if (un != 0)
+	    {
+	      n1 = up[un - 1] << GMP_NAIL_BITS;
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      for (i = un - 2; i >= 0; i--)
+		{
+		  n0 = up[i] << GMP_NAIL_BITS;
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd (*qp, r, r, nshift, d);
+		  r >>= GMP_NAIL_BITS;
+		  qp--;
+		  n1 = n0;
+		}
+	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r >> cnt;
+	}
+      else
+	{
+	  mp_limb_t  dinv, nshift;
+	  invert_limb (dinv, d);
+	  if (un != 0)
+	    {
+	      n1 = up[un - 1] << GMP_NAIL_BITS;
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      for (i = un - 2; i >= 0; i--)
+		{
+		  n0 = up[i] << GMP_NAIL_BITS;
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
+		  r >>= GMP_NAIL_BITS;
+		  qp--;
+		  n1 = n0;
+		}
+	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  for (i = qxn - 1; i >= 0; i--)
+	    {
+	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+	      r >>= GMP_NAIL_BITS;
+	      qp--;
+	    }
+	  return r >> cnt;
+	}
+    }
+}

diff --git a/mpn/generic/divrem_2.c b/mpn/generic/divrem_2.c
new file mode 100644
index 0000000..217f2f6
--- /dev/null
+++ b/mpn/generic/divrem_2.c

@@ -0,0 +1,118 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993-1996, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
+   generate that many fraction bits and append them after the other quotient
+   limbs.  Return the most significant limb of the quotient, this is always 0
+   or 1.
+
+   Preconditions:
+   1. The most significant bit of the divisor must be set.
+   2. qp must either not overlap with the input operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.
+   3. nn >= 2, even if qxn is non-zero.  */
+
+mp_limb_t
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+	      mp_ptr np, mp_size_t nn,
+	      mp_srcptr dp)
+{
+  mp_limb_t most_significant_q_limb;
+  mp_size_t i;
+  mp_limb_t r1, r0, d1, d0;
+  gmp_pi1_t di;
+
+  ASSERT (nn >= 2);
+  ASSERT (qxn >= 0);
+  ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  np += nn - 2;
+  d1 = dp[1];
+  d0 = dp[0];
+  r1 = np[1];
+  r0 = np[0];
+
+  most_significant_q_limb = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      most_significant_q_limb = 1;
+    }
+
+  invert_pi1 (di, d1, d0);
+
+  qp += qxn;
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
+      np--;
+      qp[i] = q;
+    }
+
+  if (UNLIKELY (qxn != 0))
+    {
+      qp -= qxn;
+      for (i = qxn - 1; i >= 0; i--)
+	{
+	  mp_limb_t q;
+	  udiv_qr_3by2 (q, r1, r0, r1, r0, CNST_LIMB(0), d1, d0, di.inv32);
+	  qp[i] = q;
+	}
+    }
+
+  np[1] = r1;
+  np[0] = r0;
+
+  return most_significant_q_limb;
+}

diff --git a/mpn/generic/dump.c b/mpn/generic/dump.c
new file mode 100644
index 0000000..9a4ddf4
--- /dev/null
+++ b/mpn/generic/dump.c

@@ -0,0 +1,99 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS % 4 == 0
+void
+mpn_dump (mp_srcptr ptr, mp_size_t n)
+{
+  MPN_NORMALIZE (ptr, n);
+
+  if (n == 0)
+    printf ("0\n");
+  else
+    {
+      n--;
+#if _LONG_LONG_LIMB
+      if ((ptr[n] >> GMP_LIMB_BITS / 2) != 0)
+	{
+	  printf ("%lX", (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+	  printf ("%0*lX", (GMP_LIMB_BITS / 2 / 4), (unsigned long) ptr[n]);
+	}
+      else
+#endif
+	printf ("%lX", (unsigned long) ptr[n]);
+
+      while (n)
+	{
+	  n--;
+#if _LONG_LONG_LIMB
+	  printf ("%0*lX", (GMP_NUMB_BITS - GMP_LIMB_BITS / 2) / 4,
+		  (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+	  printf ("%0*lX", GMP_LIMB_BITS / 2 / 4, (unsigned long) ptr[n]);
+#else
+	  printf ("%0*lX", GMP_NUMB_BITS / 4, (unsigned long) ptr[n]);
+#endif
+	}
+      printf ("\n");
+    }
+}
+
+#else
+
+static void
+mpn_recdump (mp_ptr p, mp_size_t n)
+{
+  mp_limb_t lo;
+  if (n != 0)
+    {
+      lo = p[0] & 0xf;
+      mpn_rshift (p, p, n, 4);
+      mpn_recdump (p, n);
+      printf ("%lX", lo);
+    }
+}
+
+void
+mpn_dump (mp_srcptr p, mp_size_t n)
+{
+  mp_ptr tp;
+  TMP_DECL;
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+  MPN_COPY (tp, p, n);
+  TMP_FREE;
+}
+
+#endif

diff --git a/mpn/generic/fib2_ui.c b/mpn/generic/fib2_ui.c
new file mode 100644
index 0000000..0b81571
--- /dev/null
+++ b/mpn/generic/fib2_ui.c

@@ -0,0 +1,174 @@
+/* mpn_fib2_ui -- calculate Fibonacci numbers.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
+   MPN_FIB2_SIZE(n) limbs.
+
+   The return value is the actual number of limbs stored, this will be at
+   least 1.  fp[size-1] will be non-zero, except when n==0, in which case
+   fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
+   (for n>0).
+
+   Notes: F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+
+   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+   low limb.
+
+   In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+   low limb.
+*/
+
+mp_size_t
+mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
+{
+  mp_size_t      size;
+  unsigned long  nfirst, mask;
+
+  TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
+
+  ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
+
+  /* Take a starting pair from the table. */
+  mask = 1;
+  for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
+    mask <<= 1;
+  TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
+
+  f1p[0] = FIB_TABLE ((int) nfirst - 1);
+  fp[0]  = FIB_TABLE (nfirst);
+  size = 1;
+
+  /* Skip to the end if the table lookup gives the final answer. */
+  if (mask != 1)
+    {
+      mp_size_t  alloc;
+      mp_ptr        xp;
+      TMP_DECL;
+
+      TMP_MARK;
+      alloc = MPN_FIB2_SIZE (n);
+      xp = TMP_ALLOC_LIMBS (alloc);
+
+      do
+	{
+	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+	     n&mask upwards.
+
+	     The next bit of n is n&(mask>>1) and we'll double to the pair
+	     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+	     that bit is 0 or 1 respectively.  */
+
+	  TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
+			 n >> refmpn_count_trailing_zeros(mask),
+			 mask, size, alloc);
+		 mpn_trace ("fp ", fp, size);
+		 mpn_trace ("f1p", f1p, size));
+
+	  /* fp normalized, f1p at most one high zero */
+	  ASSERT (fp[size-1] != 0);
+	  ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
+
+	  /* f1p[size-1] might be zero, but this occurs rarely, so it's not
+	     worth bothering checking for it */
+	  ASSERT (alloc >= 2*size);
+	  mpn_sqr (xp, fp,  size);
+	  mpn_sqr (fp, f1p, size);
+	  size *= 2;
+
+	  /* Shrink if possible.  Since fp was normalized there'll be at
+	     most one high zero on xp (and if there is then there's one on
+	     yp too).  */
+	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+	  size -= (xp[size-1] == 0);
+	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
+
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+	     n&mask is the low bit of our implied k.  */
+
+	  ASSERT ((fp[0] & 2) == 0);
+	  /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+	  fp[0] |= (n & mask ? 2 : 0);			/* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
+	  MPN_INCR_U(fp, size + 1, (n & mask ? 0 : 2));	/* possible +2 */
+#else
+	  {
+	    mp_limb_t  c;
+
+	    c = mpn_lshift (xp, xp, size, 2);
+	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
+	    c -= mpn_sub_n (fp, xp, fp, size);
+	    fp[size] = c;
+	  }
+#endif
+	  ASSERT (alloc >= size+1);
+	  size += (fp[size] != 0);
+
+	  /* now n&mask is the new bit of n being considered */
+	  mask >>= 1;
+
+	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+	     F[2k+1] and F[2k-1].  */
+	  if (n & mask)
+	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
+	  else {
+	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
+
+	    /* Can have a high zero after replacing F[2k+1] with F[2k].
+	       f1p will have a high zero if fp does. */
+	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+	    size -= (fp[size-1] == 0);
+	  }
+	}
+      while (mask != 1);
+
+      TMP_FREE;
+    }
+
+  TRACE (printf ("done size=%ld\n", size);
+	 mpn_trace ("fp ", fp, size);
+	 mpn_trace ("f1p", f1p, size));
+
+  return size;
+}

diff --git a/mpn/generic/fib2m.c b/mpn/generic/fib2m.c
new file mode 100644
index 0000000..89d2b86
--- /dev/null
+++ b/mpn/generic/fib2m.c

@@ -0,0 +1,252 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato, based on the previous
+fib2_ui.c file.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+   returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+        {
+          ++n;
+          if (x > y)
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+              return 1;
+            }
+          else
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+              return -1;
+            }
+        }
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+/* Store F[n] at fp and F[n-1] at f1p.  Both are computed modulo m.
+   fp and f1p should have room for mn*2+1 limbs.
+
+   The sign of one or both the values may be flipped (n-F, instead of F),
+   the return value is 0 (zero) if the signs are coherent (both positive
+   or both negative) and 1 (one) otherwise.
+
+   Notes:
+
+   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+   low limb.
+
+   In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+   low limb.
+
+   TODO: Should {tp, 2 * mn} be passed as a scratch pointer?
+   Should the call to mpn_fib2_ui() obtain (up to) 2*mn limbs?
+*/
+
+int
+mpn_fib2m (mp_ptr fp, mp_ptr f1p, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn)
+{
+  unsigned long	nfirst;
+  mp_limb_t	nh;
+  mp_bitcnt_t	nbi;
+  mp_size_t	sn, fn;
+  int		fcnt, ncnt;
+
+  ASSERT (! MPN_OVERLAP_P (fp, MAX(2*mn+1,5), f1p, MAX(2*mn+1,5)));
+  ASSERT (nn > 0 && np[nn - 1] != 0);
+
+  /* Estimate the maximal n such that fibonacci(n) fits in mn limbs. */
+#if GMP_NUMB_BITS % 16 == 0
+  if (UNLIKELY (ULONG_MAX / (23 * (GMP_NUMB_BITS / 16)) <= mn))
+    nfirst = ULONG_MAX;
+  else
+    nfirst = mn * (23 * (GMP_NUMB_BITS / 16));
+#else
+  {
+    mp_bitcnt_t	mbi;
+    mbi = (mp_bitcnt_t) mn * GMP_NUMB_BITS;
+
+    if (UNLIKELY (ULONG_MAX / 23 < mbi))
+      {
+	if (UNLIKELY (ULONG_MAX / 23 * 16 <= mbi))
+	  nfirst = ULONG_MAX;
+	else
+	  nfirst = mbi / 16 * 23;
+      }
+    else
+      nfirst = mbi * 23 / 16;
+  }
+#endif
+
+  sn = nn - 1;
+  nh = np[sn];
+  count_leading_zeros (ncnt, nh);
+  count_leading_zeros (fcnt, nfirst);
+
+  if (fcnt >= ncnt)
+    {
+      ncnt = fcnt - ncnt;
+      nh >>= ncnt;
+    }
+  else if (sn > 0)
+    {
+      ncnt -= fcnt;
+      nh <<= ncnt;
+      ncnt = GMP_NUMB_BITS - ncnt;
+      --sn;
+      nh |= np[sn] >> ncnt;
+    }
+  else
+    ncnt = 0;
+
+  nbi = sn * GMP_NUMB_BITS + ncnt;
+  if (nh > nfirst)
+    {
+      nh >>= 1;
+      ++nbi;
+    }
+
+  ASSERT (nh <= nfirst);
+  /* Take a starting pair from mpn_fib2_ui. */
+  fn = mpn_fib2_ui (fp, f1p, nh);
+  MPN_ZERO (fp + fn, mn - fn);
+  MPN_ZERO (f1p + fn, mn - fn);
+
+  if (nbi == 0)
+    {
+      if (fn == mn)
+	{
+	  mp_limb_t qp[2];
+	  mpn_tdiv_qr (qp, fp, 0, fp, fn, mp, mn);
+	  mpn_tdiv_qr (qp, f1p, 0, f1p, fn, mp, mn);
+	}
+
+      return 0;
+    }
+  else
+    {
+      mp_ptr	tp;
+      unsigned	pb = nh & 1;
+      int	neg;
+      TMP_DECL;
+
+      TMP_MARK;
+
+      tp = TMP_ALLOC_LIMBS (2 * mn + (mn < 2));
+
+      do
+	{
+	  mp_ptr	rp;
+	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+	     nbi upwards.
+
+	     Based on the next bit of n, we'll double to the pair
+	     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+	     that bit is 0 or 1 respectively.  */
+
+	  mpn_sqr (tp, fp,  mn);
+	  mpn_sqr (fp, f1p, mn);
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+	  f1p[2 * mn] = mpn_add_n (f1p, tp, fp, 2 * mn);
+
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+	     pb is the low bit of our implied k.  */
+
+	  /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+	  ASSERT ((fp[0] & 2) == 0);
+	  ASSERT (pb == (pb & 1));
+	  ASSERT ((fp[0] + (pb ? 2 : 0)) == (fp[0] | (pb << 1)));
+	  fp[0] |= pb << 1;		/* possible -2 */
+#if HAVE_NATIVE_mpn_rsblsh2_n
+	  fp[2 * mn] = 1 + mpn_rsblsh2_n (fp, fp, tp, 2 * mn);
+	  MPN_INCR_U(fp, 2 * mn + 1, (1 ^ pb) << 1);	/* possible +2 */
+	  fp[2 * mn] = (fp[2 * mn] - 1) & GMP_NUMB_MAX;
+#else
+	  {
+	    mp_limb_t  c;
+
+	    c = mpn_lshift (tp, tp, 2 * mn, 2);
+	    tp[0] |= (1 ^ pb) << 1;	/* possible +2 */
+	    c -= mpn_sub_n (fp, tp, fp, 2 * mn);
+	    fp[2 * mn] = c & GMP_NUMB_MAX;
+	  }
+#endif
+	  neg = fp[2 * mn] == GMP_NUMB_MAX;
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2 */
+	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k */
+
+	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+	     F[2k+1] and F[2k-1].  */
+	  --nbi;
+	  pb = (np [nbi / GMP_NUMB_BITS] >> (nbi % GMP_NUMB_BITS)) & 1;
+	  rp = pb ? f1p : fp;
+	  if (neg)
+	    {
+	      /* Calculate -(F[2k+1] - F[2k-1]) */
+	      rp[2 * mn] = f1p[2 * mn] + 1 - mpn_sub_n (rp, f1p, fp, 2 * mn);
+	      neg = ! pb;
+	      if (pb) /* fp not overwritten, negate it. */
+		fp [2 * mn] = 1 ^ mpn_neg (fp, fp, 2 * mn);
+	    }
+	  else
+	    {
+	      neg = abs_sub_n (rp, fp, f1p, 2 * mn + 1) < 0;
+	    }
+
+	  mpn_tdiv_qr (tp, fp, 0, fp, 2 * mn + 1, mp, mn);
+	  mpn_tdiv_qr (tp, f1p, 0, f1p, 2 * mn + 1, mp, mn);
+	}
+      while (nbi != 0);
+
+      TMP_FREE;
+
+      return neg;
+    }
+}

diff --git a/mpn/generic/gcd.c b/mpn/generic/gcd.c
new file mode 100644
index 0000000..3f92cbf
--- /dev/null
+++ b/mpn/generic/gcd.c

@@ -0,0 +1,266 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     computation, Math. Comp. 77 (2008), 589-607.
+
+  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+  then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+struct gcd_ctx
+{
+  mp_ptr gp;
+  mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	  mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+  MPN_COPY (ctx->gp, gp, gn);
+  ctx->gn = gn;
+}
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+
+  struct gcd_ctx ctx;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (usize >= n);
+  ASSERT (n > 0);
+  ASSERT (vp[n-1] > 0);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  /* For initial division */
+  scratch = usize - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+#if TUNE_GCD_P
+  if (CHOOSE_P (n) > 0)
+#else
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t scratch;
+#if TUNE_GCD_P
+      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+	 is increasing */
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+      hgcd_scratch = mpn_hgcd_itch (n);
+      update_scratch = 2*(n - 1);
+#else
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+#endif
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+	talloc = scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (usize > n)
+    {
+      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+      if (mpn_zero_p (up, n))
+	{
+	  MPN_COPY (gp, vp, n);
+	  ctx.gn = n;
+	  goto done;
+	}
+    }
+
+  ctx.gp = gp;
+
+#if TUNE_GCD_P
+  while (CHOOSE_P (n) > 0)
+#else
+  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+	}
+      else
+	{
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t uh, ul, vh, vl;
+      mp_limb_t mask;
+
+      mask = up[n-1] | vp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  uh = up[n-1]; ul = up[n-2];
+	  vh = vp[n-1]; vl = vp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+	}
+
+      /* Try an mpn_hgcd2 step */
+      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+	  MP_PTR_SWAP (up, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+	  if (n == 0)
+	    goto done;
+	}
+    }
+
+  ASSERT(up[n-1] | vp[n-1]);
+
+  /* Due to the calling convention for mpn_gcd, at most one can be even. */
+  if ((up[0] & 1) == 0)
+    MP_PTR_SWAP (up, vp);
+  ASSERT ((up[0] & 1) != 0);
+
+  {
+    mp_limb_t u0, u1, v0, v1;
+    mp_double_limb_t g;
+
+    u0 = up[0];
+    v0 = vp[0];
+
+    if (n == 1)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	*gp = mpn_gcd_11 (u0, v0 >> cnt);
+	ctx.gn = 1;
+	goto done;
+      }
+
+    v1 = vp[1];
+    if (UNLIKELY (v0 == 0))
+      {
+	v0 = v1;
+	v1 = 0;
+	/* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
+	   when this situation occurs internally.  */
+      }
+    if ((v0 & 1) == 0)
+      {
+	int cnt;
+	count_trailing_zeros (cnt, v0);
+	v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
+	v1 >>= cnt;
+      }
+
+    u1 = up[1];
+    g = mpn_gcd_22 (u1, u0, v1, v0);
+    gp[0] = g.d0;
+    gp[1] = g.d1;
+    ctx.gn = 1 + (g.d1 > 0);
+  }
+done:
+  TMP_FREE;
+  return ctx.gn;
+}

diff --git a/mpn/generic/gcd_1.c b/mpn/generic/gcd_1.c
new file mode 100644
index 0000000..22b1422
--- /dev/null
+++ b/mpn/generic/gcd_1.c

@@ -0,0 +1,103 @@
+/* mpn_gcd_1 -- mpn and limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0.  It would be tough to make it work for
+   V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
+
+   The threshold for doing u%v when size==1 will vary by CPU according to
+   the speed of a division and the code generated for the main loop.  Any
+   tuning for this is left to a CPU specific implementation.  */
+
+mp_limb_t
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+{
+  mp_limb_t      ulimb;
+  unsigned long  zero_bits, u_low_zero_bits;
+  int c;
+
+  ASSERT (size >= 1);
+  ASSERT (vlimb != 0);
+  ASSERT_MPN_NONZERO_P (up, size);
+
+  ulimb = up[0];
+
+  /* Need vlimb odd for modexact, want it odd to get common zeros. */
+  count_trailing_zeros (zero_bits, vlimb);
+  vlimb >>= zero_bits;
+
+  if (size > 1)
+    {
+      /* Must get common zeros before the mod reduction.  If ulimb==0 then
+	 vlimb already gives the common zeros.  */
+      if (ulimb != 0)
+	{
+	  count_trailing_zeros (u_low_zero_bits, ulimb);
+	  zero_bits = MIN (zero_bits, u_low_zero_bits);
+	}
+
+      ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
+      if (ulimb == 0)
+	goto done;
+
+      count_trailing_zeros (c, ulimb);
+      ulimb >>= c;
+    }
+  else
+    {
+      /* size==1, so up[0]!=0 */
+      count_trailing_zeros (u_low_zero_bits, ulimb);
+      ulimb >>= u_low_zero_bits;
+      zero_bits = MIN (zero_bits, u_low_zero_bits);
+
+      /* make u bigger */
+      if (vlimb > ulimb)
+	MP_LIMB_T_SWAP (ulimb, vlimb);
+
+      /* if u is much bigger than v, reduce using a division rather than
+	 chipping away at it bit-by-bit */
+      if ((ulimb >> 16) > vlimb)
+	{
+	  ulimb %= vlimb;
+	  if (ulimb == 0)
+	    goto done;
+
+	  count_trailing_zeros (c, ulimb);
+	  ulimb >>= c;
+	}
+    }
+
+  vlimb = mpn_gcd_11 (ulimb, vlimb);
+
+ done:
+  return vlimb << zero_bits;
+}

diff --git a/mpn/generic/gcd_11.c b/mpn/generic/gcd_11.c
new file mode 100644
index 0000000..214e45c
--- /dev/null
+++ b/mpn/generic/gcd_11.c

@@ -0,0 +1,74 @@
+/* mpn_gcd_11 -- limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  ASSERT (u & v & 1);
+
+  /* In this loop, we represent the odd numbers ulimb and vlimb
+     without the redundant least significant one bit. This reduction
+     in size by one bit ensures that the high bit of t, below, is set
+     if and only if vlimb > ulimb. */
+
+  u >>= 1;
+  v >>= 1;
+
+  while (u != v)
+    {
+      mp_limb_t t;
+      mp_limb_t vgtu;
+      int c;
+
+      t = u - v;
+      vgtu = LIMB_HIGHBIT_TO_MASK (t);
+
+      /* v <-- min (u, v) */
+      v += (vgtu & t);
+
+      /* u <-- |u - v| */
+      u = (t ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t);
+      /* We have c <= GMP_LIMB_BITS - 2 here, so that
+
+	   ulimb >>= (c + 1);
+
+	 would be safe. But unlike the addition c + 1, a separate
+	 shift by 1 is independent of c, and can be executed in
+	 parallel with count_trailing_zeros. */
+      u = (u >> 1) >> c;
+    }
+  return (u << 1) + 1;
+}

diff --git a/mpn/generic/gcd_22.c b/mpn/generic/gcd_22.c
new file mode 100644
index 0000000..d97f096
--- /dev/null
+++ b/mpn/generic/gcd_22.c

@@ -0,0 +1,131 @@
+/* mpn_gcd_22 -- double limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+mp_double_limb_t
+mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
+{
+  mp_double_limb_t g;
+  ASSERT (u0 & v0 & 1);
+
+  /* Implicit least significant bit */
+  u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1));
+  u1 >>= 1;
+
+  v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1));
+  v1 >>= 1;
+
+  while (u1 || v1) /* u1 == 0 can happen at most twice per call */
+    {
+      mp_limb_t vgtu, t1, t0;
+      sub_ddmmss (t1, t0, u1, u0, v1, v0);
+      vgtu = LIMB_HIGHBIT_TO_MASK(t1);
+
+      if (UNLIKELY (t0 == 0))
+	{
+	  if (t1 == 0)
+	    {
+	      g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1));
+	      g.d0 = (u0 << 1) | 1;
+	      return g;
+	    }
+	  int c;
+	  count_trailing_zeros (c, t1);
+
+	  /* v1 = min (u1, v1) */
+	  v1 += (vgtu & t1);
+	  /* u0 = |u1 - v1| */
+	  u0 = (t1 ^ vgtu) - vgtu;
+	  ASSERT (c < GMP_LIMB_BITS - 1);
+	  u0 >>= c + 1;
+	  u1 = 0;
+	}
+      else
+	{
+	  int c;
+	  count_trailing_zeros (c, t0);
+	  c++;
+	  /* V <-- min (U, V).
+
+	     Assembly version should use cmov. Another alternative,
+	     avoiding carry propagation, would be
+
+	     v0 += vgtu & t0; v1 += vtgu & (u1 - v1);
+	  */
+	  add_ssaaaa (v1, v0, v1, v0, vgtu & t1, vgtu & t0);
+	  /* U  <--  |U - V|
+	     No carry handling needed in this conditional negation,
+	     since t0 != 0. */
+	  u0 = (t0 ^ vgtu) - vgtu;
+	  u1 = t1 ^ vgtu;
+	  if (UNLIKELY (c == GMP_LIMB_BITS))
+	    {
+	      u0 = u1;
+	      u1 = 0;
+	    }
+	  else
+	    {
+	      u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c));
+	      u1 >>= c;
+	    }
+	}
+    }
+  while ((v0 | u0) & GMP_LIMB_HIGHBIT)
+    { /* At most two iterations */
+      mp_limb_t vgtu, t0;
+      int c;
+      sub_ddmmss (vgtu, t0, 0, u0, 0, v0);
+      if (UNLIKELY (t0 == 0))
+	{
+	  g.d1 = u0 >> (GMP_LIMB_BITS - 1);
+	  g.d0 = (u0 << 1) | 1;
+	  return g;
+	}
+
+      /* v <-- min (u, v) */
+      v0 += (vgtu & t0);
+
+      /* u <-- |u - v| */
+      u0 = (t0 ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t0);
+      u0 = (u0 >> 1) >> c;
+    }
+
+  g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1);
+  g.d1 = 0;
+  return g;
+}

diff --git a/mpn/generic/gcd_subdiv_step.c b/mpn/generic/gcd_subdiv_step.c
new file mode 100644
index 0000000..9c3b88d
--- /dev/null
+++ b/mpn/generic/gcd_subdiv_step.c

@@ -0,0 +1,204 @@
+/* gcd_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>		/* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. The normal case is to compute the reduced
+   a and b, and return the new size.
+
+   If s == 0 (used for gcd and gcdext), returns zero if the gcd is
+   found.
+
+   If s > 0, don't reduce to size <= s, and return zero if no
+   reduction is possible (if either a, b or |a-b| is of size <= s). */
+
+/* The hook function is called as
+
+     hook(ctx, gp, gn, qp, qn, d)
+
+   in the following cases:
+
+   + If A = B at the start, G is the gcd, Q is NULL, d = -1.
+
+   + If one input is zero at the start, G is the gcd, Q is NULL,
+     d = 0 if A = G and d = 1 if B = G.
+
+   Otherwise, if d = 0 we have just subtracted a multiple of A from B,
+   and if d = 1 we have subtracted a multiple of B from A.
+
+   + If A = B after subtraction, G is the gcd, Q is NULL.
+
+   + If we get a zero remainder after division, G is the gcd, Q is the
+     quotient.
+
+   + Otherwise, G is NULL, Q is the quotient (often 1).
+
+ */
+
+mp_size_t
+mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
+		     gcd_subdiv_step_hook *hook, void *ctx,
+		     mp_ptr tp)
+{
+  static const mp_limb_t one = CNST_LIMB(1);
+  mp_size_t an, bn, qn;
+
+  int swapped;
+
+  ASSERT (n > 0);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  an = bn = n;
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  swapped = 0;
+
+  /* Arrange so that a < b, subtract b -= a, and maintain
+     normalization. */
+  if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  /* For gcdext, return the smallest of the two cofactors, so
+	     pass d = -1. */
+	  if (s == 0)
+	    hook (ctx, ap, an, NULL, 0, -1);
+	  return 0;
+	}
+      else if (c > 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  swapped ^= 1;
+	}
+    }
+  else
+    {
+      if (an > bn)
+	{
+	  MPN_PTR_SWAP (ap, an, bp, bn);
+	  swapped ^= 1;
+	}
+    }
+  if (an <= s)
+    {
+      if (s == 0)
+	hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
+      return 0;
+    }
+
+  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (bn > 0);
+
+  if (bn <= s)
+    {
+      /* Undo subtraction. */
+      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+      if (cy > 0)
+	bp[an] = cy;
+      return 0;
+    }
+
+  /* Arrange so that a < b */
+  if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  if (s > 0)
+	    /* Just record subtraction and return */
+	    hook (ctx, NULL, 0, &one, 1, swapped);
+	  else
+	    /* Found gcd. */
+	    hook (ctx, bp, bn, NULL, 0, swapped);
+	  return 0;
+	}
+
+      hook (ctx, NULL, 0, &one, 1, swapped);
+
+      if (c > 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  swapped ^= 1;
+	}
+    }
+  else
+    {
+      hook (ctx, NULL, 0, &one, 1, swapped);
+
+      if (an > bn)
+	{
+	  MPN_PTR_SWAP (ap, an, bp, bn);
+	  swapped ^= 1;
+	}
+    }
+
+  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
+  qn = bn - an + 1;
+  bn = an;
+  MPN_NORMALIZE (bp, bn);
+
+  if (UNLIKELY (bn <= s))
+    {
+      if (s == 0)
+	{
+	  hook (ctx, ap, an, tp, qn, swapped);
+	  return 0;
+	}
+
+      /* Quotient is one too large, so decrement it and add back A. */
+      if (bn > 0)
+	{
+	  mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+	  if (cy)
+	    bp[an++] = cy;
+	}
+      else
+	MPN_COPY (bp, ap, an);
+
+      MPN_DECR_U (tp, qn, 1);
+    }
+
+  hook (ctx, NULL, 0, tp, qn, swapped);
+  return an;
+}

diff --git a/mpn/generic/gcdext.c b/mpn/generic/gcdext.c
new file mode 100644
index 0000000..5501480
--- /dev/null
+++ b/mpn/generic/gcdext.c

@@ -0,0 +1,557 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
+   the size is returned (if inputs are non-normalized, result may be
+   non-normalized too). Temporary space needed is M->n + n.
+ */
+static size_t
+hgcd_mul_matrix_vector (struct hgcd_matrix *M,
+			mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     t  = u00 * a
+     r  = u10 * b
+     r += t;
+
+     t  = u11 * b
+     b  = u01 * a
+     b += t;
+  */
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[0][0], M->n, ap, n);
+      mpn_mul (rp, M->p[1][0], M->n, bp, n);
+    }
+  else
+    {
+      mpn_mul (tp, ap, n, M->p[0][0], M->n);
+      mpn_mul (rp, bp, n, M->p[1][0], M->n);
+    }
+
+  ah = mpn_add_n (rp, rp, tp, n + M->n);
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[1][1], M->n, bp, n);
+      mpn_mul (bp, M->p[0][1], M->n, ap, n);
+    }
+  else
+    {
+      mpn_mul (tp, bp, n, M->p[1][1], M->n);
+      mpn_mul (bp, ap, n, M->p[0][1], M->n);
+    }
+  bh = mpn_add_n (bp, bp, tp, n + M->n);
+
+  n += M->n;
+  if ( (ah | bh) > 0)
+    {
+      rp[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* Normalize */
+      while ( (rp[n-1] | bp[n-1]) == 0)
+	n--;
+    }
+
+  return n;
+}
+
+#define COMPUTE_V_ITCH(n) (2*(n))
+
+/* Computes |v| = |(g - u a)| / b, where u may be positive or
+   negative, and v is of the opposite sign. max(a, b) is of size n, u and
+   v at most size n, and v must have space for n+1 limbs. */
+static mp_size_t
+compute_v (mp_ptr vp,
+	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+	   mp_srcptr gp, mp_size_t gn,
+	   mp_srcptr up, mp_size_t usize,
+	   mp_ptr tp)
+{
+  mp_size_t size;
+  mp_size_t an;
+  mp_size_t bn;
+  mp_size_t vn;
+
+  ASSERT (n > 0);
+  ASSERT (gn > 0);
+  ASSERT (usize != 0);
+
+  size = ABS (usize);
+  ASSERT (size <= n);
+  ASSERT (up[size-1] > 0);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+  ASSERT (gn <= an);
+
+  if (an >= size)
+    mpn_mul (tp, ap, an, up, size);
+  else
+    mpn_mul (tp, up, size, ap, an);
+
+  size += an;
+
+  if (usize > 0)
+    {
+      /* |v| = -v = (u a - g) / b */
+
+      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
+      MPN_NORMALIZE (tp, size);
+      if (size == 0)
+	return 0;
+    }
+  else
+    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
+	 (g + |u| a) always fits in (|usize| + an) limbs. */
+
+      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
+      size -= (tp[size - 1] == 0);
+    }
+
+  /* Now divide t / b. There must be no remainder */
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (size >= bn);
+
+  vn = size + 1 - bn;
+  ASSERT (vn <= n + 1);
+
+  mpn_divexact (vp, tp, size, bp, bn);
+  vn -= (vp[vn-1] == 0);
+
+  return vn;
+}
+
+/* Temporary storage:
+
+   Initial division: Quotient of at most an - n + 1 <= an limbs.
+
+   Storage for u0 and u1: 2(n+1).
+
+   Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
+
+   Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
+
+   When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
+
+   When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
+
+   For the lehmer call after the loop, Let T denote
+   GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
+   u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
+   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
+   sufficient for both operations.
+
+*/
+
+/* Optimal choice of p seems difficult. In each iteration the division
+ * of work between hgcd and the updates of u0 and u1 depends on the
+ * current size of the u. It may be desirable to use a different
+ * choice of p in each iteration. Also the input size seems to matter;
+ * choosing p = n / 3 in the first iteration seems to improve
+ * performance slightly for input size just above the threshold, but
+ * degrade performance for larger inputs. */
+#define CHOOSE_P_1(n) ((n) / 2)
+#define CHOOSE_P_2(n) ((n) / 3)
+
+mp_size_t
+mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
+	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_size_t ualloc = n + 1;
+
+  struct gcdext_ctx ctx;
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (an >= n);
+  ASSERT (n > 0);
+  ASSERT (bp[n-1] > 0);
+
+  TMP_MARK;
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
+
+  /* For initial division */
+  scratch = an - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      /* For hgcd loop. */
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p1 = CHOOSE_P_1 (n);
+      mp_size_t p2 = CHOOSE_P_2 (n);
+      mp_size_t min_p = MIN(p1, p2);
+      mp_size_t max_p = MAX(p1, p2);
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
+      hgcd_scratch = mpn_hgcd_itch (n - min_p);
+      update_scratch = max_p + n - 1;
+
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+	talloc = scratch;
+
+      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
+	 copies of a and b. */
+      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
+	+ 3*GCDEXT_DC_THRESHOLD;
+
+      if (scratch > talloc)
+	talloc = scratch;
+
+      /* Cofactors u0 and u1 */
+      talloc += 2*(n+1);
+    }
+
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (an > n)
+    {
+      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
+
+      if (mpn_zero_p (ap, n))
+	{
+	  MPN_COPY (gp, bp, n);
+	  *usizep = 0;
+	  TMP_FREE;
+	  return n;
+	}
+    }
+
+  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+
+  MPN_ZERO (tp, 2*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usizep;
+
+  {
+    /* For the first hgcd call, there are no u updates, and it makes
+       some sense to use a different choice for p. */
+
+    /* FIXME: We could trim use of temporary storage, since u0 and u1
+       are not used yet. For the hgcd call, we could swap in the u0
+       and u1 pointers for the relevant matrix elements. */
+
+    struct hgcd_matrix M;
+    mp_size_t p = CHOOSE_P_1 (n);
+    mp_size_t nn;
+
+    mpn_hgcd_matrix_init (&M, n - p, tp);
+    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+    if (nn > 0)
+      {
+	ASSERT (M.n <= (n - p - 1)/2);
+	ASSERT (M.n + p <= (p + n - 1) / 2);
+
+	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
+	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+
+	MPN_COPY (u0, M.p[1][0], M.n);
+	MPN_COPY (u1, M.p[1][1], M.n);
+	un = M.n;
+	while ( (u0[un-1] | u1[un-1] ) == 0)
+	  un--;
+      }
+    else
+      {
+	/* mpn_hgcd has failed. Then either one of a or b is very
+	   small, or the difference is very small. Perform one
+	   subtraction followed by one division. */
+	u1[0] = 1;
+
+	ctx.u0 = u0;
+	ctx.u1 = u1;
+	ctx.tp = tp + n; /* ualloc */
+	ctx.un = 1;
+
+	/* Temporary storage n */
+	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	if (n == 0)
+	  {
+	    TMP_FREE;
+	    return ctx.gn;
+	  }
+
+	un = ctx.un;
+	ASSERT (un < ualloc);
+      }
+  }
+
+  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P_2 (n);
+      mp_size_t nn;
+
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  mp_ptr t0;
+
+	  t0 = tp + matrix_scratch;
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
+
+	  /* By the same analysis as for mpn_hgcd_matrix_mul */
+	  ASSERT (M.n + un <= ualloc);
+
+	  /* FIXME: This copying could be avoided by some swapping of
+	   * pointers. May need more temporary storage, though. */
+	  MPN_COPY (t0, u0, un);
+
+	  /* Temporary storage ualloc */
+	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
+
+	  ASSERT (un < ualloc);
+	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
+	}
+      else
+	{
+	  /* mpn_hgcd has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  ctx.u0 = u0;
+	  ctx.u1 = u1;
+	  ctx.tp = tp + n; /* ualloc */
+	  ctx.un = un;
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  if (n == 0)
+	    {
+	      TMP_FREE;
+	      return ctx.gn;
+	    }
+
+	  un = ctx.un;
+	  ASSERT (un < ualloc);
+	}
+    }
+  /* We have A = ... a + ... b
+	     B =  u0 a +  u1 b
+
+	     a = u1  A + ... B
+	     b = -u0 A + ... B
+
+     with bounds
+
+       |u0|, |u1| <= B / min(a, b)
+
+     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
+     in which case the only reduction done so far is a = A - k B for
+     some k.
+
+     Compute g = u a + v b = (u u1 - v u0) A + (...) B
+     Here, u, v are bounded by
+
+       |u| <= b,
+       |v| <= a
+  */
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
+    {
+      /* Must return the smallest cofactor, +u1 or -u0 */
+      int c;
+
+      MPN_COPY (gp, ap, n);
+
+      MPN_CMP (c, u0, u1, un);
+      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
+	 this case we choose the cofactor + 1, corresponding to G = A
+	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+	{
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usizep = -un;
+	}
+      else
+	{
+	  MPN_NORMALIZE_NOT_ZERO (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usizep = un;
+	}
+
+      TMP_FREE;
+      return n;
+    }
+  else if (UNLIKELY (u0[0] == 0) && un == 1)
+    {
+      mp_size_t gn;
+      ASSERT (u1[0] == 1);
+
+      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
+      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+  else
+    {
+      mp_size_t u0n;
+      mp_size_t u1n;
+      mp_size_t lehmer_un;
+      mp_size_t lehmer_vn;
+      mp_size_t gn;
+
+      mp_ptr lehmer_up;
+      mp_ptr lehmer_vp;
+      int negate;
+
+      lehmer_up = tp; tp += n;
+
+      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
+      MPN_COPY (tp, ap, n);
+      MPN_COPY (tp + n, bp, n);
+      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
+
+      u0n = un;
+      MPN_NORMALIZE (u0, u0n);
+      ASSERT (u0n > 0);
+
+      if (lehmer_un == 0)
+	{
+	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
+	  MPN_COPY (up, u0, u0n);
+	  *usizep = -u0n;
+
+	  TMP_FREE;
+	  return gn;
+	}
+
+      lehmer_vp = tp;
+      /* Compute v = (g - u a) / b */
+      lehmer_vn = compute_v (lehmer_vp,
+			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
+
+      if (lehmer_un > 0)
+	negate = 0;
+      else
+	{
+	  lehmer_un = -lehmer_un;
+	  negate = 1;
+	}
+
+      u1n = un;
+      MPN_NORMALIZE (u1, u1n);
+      ASSERT (u1n > 0);
+
+      ASSERT (lehmer_un + u1n <= ualloc);
+      ASSERT (lehmer_vn + u0n <= ualloc);
+
+      /* We may still have v == 0 */
+
+      /* Compute u u0 */
+      if (lehmer_un <= u1n)
+	/* Should be the common case */
+	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
+      else
+	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
+
+      un = u1n + lehmer_un;
+      un -= (up[un - 1] == 0);
+
+      if (lehmer_vn > 0)
+	{
+	  mp_limb_t cy;
+
+	  /* Overwrites old u1 value */
+	  if (lehmer_vn <= u0n)
+	    /* Should be the common case */
+	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
+	  else
+	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
+
+	  u1n = u0n + lehmer_vn;
+	  u1n -= (u1[u1n - 1] == 0);
+
+	  if (u1n <= un)
+	    {
+	      cy = mpn_add (up, up, un, u1, u1n);
+	    }
+	  else
+	    {
+	      cy = mpn_add (up, u1, u1n, up, un);
+	      un = u1n;
+	    }
+	  up[un] = cy;
+	  un += (cy != 0);
+
+	  ASSERT (un < ualloc);
+	}
+      *usizep = negate ? -un : un;
+
+      TMP_FREE;
+      return gn;
+    }
+}

diff --git a/mpn/generic/gcdext_1.c b/mpn/generic/gcdext_1.c
new file mode 100644
index 0000000..b221a92
--- /dev/null
+++ b/mpn/generic/gcdext_1.c

@@ -0,0 +1,275 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_1_USE_BINARY
+#define GCDEXT_1_USE_BINARY 0
+#endif
+
+#ifndef GCDEXT_1_BINARY_METHOD
+#define GCDEXT_1_BINARY_METHOD 2
+#endif
+
+#if GCDEXT_1_USE_BINARY
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
+	      mp_limb_t u, mp_limb_t v)
+{
+  /* Maintain
+
+     U = t1 u + t0 v
+     V = s1 u + s0 v
+
+     where U, V are the inputs (without any shared power of two),
+     and the matrix has determinant ± 2^{shift}.
+  */
+  mp_limb_t s0 = 1;
+  mp_limb_t t0 = 0;
+  mp_limb_t s1 = 0;
+  mp_limb_t t1 = 1;
+  mp_limb_t ug;
+  mp_limb_t vg;
+  mp_limb_t ugh;
+  mp_limb_t vgh;
+  unsigned zero_bits;
+  unsigned shift;
+  unsigned i;
+#if GCDEXT_1_BINARY_METHOD == 2
+  mp_limb_t det_sign;
+#endif
+
+  ASSERT (u > 0);
+  ASSERT (v > 0);
+
+  count_trailing_zeros (zero_bits, u | v);
+  u >>= zero_bits;
+  v >>= zero_bits;
+
+  if ((u & 1) == 0)
+    {
+      count_trailing_zeros (shift, u);
+      u >>= shift;
+      t1 <<= shift;
+    }
+  else if ((v & 1) == 0)
+    {
+      count_trailing_zeros (shift, v);
+      v >>= shift;
+      s0 <<= shift;
+    }
+  else
+    shift = 0;
+
+#if GCDEXT_1_BINARY_METHOD == 1
+  while (u != v)
+    {
+      unsigned count;
+      if (u > v)
+	{
+	  u -= v;
+
+	  count_trailing_zeros (count, u);
+	  u >>= count;
+
+	  t0 += t1; t1 <<= count;
+	  s0 += s1; s1 <<= count;
+	}
+      else
+	{
+	  v -= u;
+
+	  count_trailing_zeros (count, v);
+	  v >>= count;
+
+	  t1 += t0; t0 <<= count;
+	  s1 += s0; s0 <<= count;
+	}
+      shift += count;
+    }
+#else
+# if GCDEXT_1_BINARY_METHOD == 2
+  u >>= 1;
+  v >>= 1;
+
+  det_sign = 0;
+
+  while (u != v)
+    {
+      unsigned count;
+      mp_limb_t d =  u - v;
+      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
+      mp_limb_t sx;
+      mp_limb_t tx;
+
+      /* When v <= u (vgtu == 0), the updates are:
+
+	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
+	   (t1, t0) <-- (t1 << count, t0 + t1)
+
+	 and when v > 0, the updates are
+
+	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
+	   (t1, t0) <-- (t0 << count, t0 + t1)
+
+	 and similarly for s1, s0
+      */
+
+      /* v <-- min (u, v) */
+      v += (vgtu & d);
+
+      /* u <-- |u - v| */
+      u = (d ^ vgtu) - vgtu;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * d or u, but using d gives more parallelism. */
+      count_trailing_zeros (count, d);
+
+      det_sign ^= vgtu;
+
+      tx = vgtu & (t0 - t1);
+      sx = vgtu & (s0 - s1);
+      t0 += t1;
+      s0 += s1;
+      t1 += tx;
+      s1 += sx;
+
+      count++;
+      u >>= count;
+      t1 <<= count;
+      s1 <<= count;
+      shift += count;
+    }
+  u = (u << 1) + 1;
+# else /* GCDEXT_1_BINARY_METHOD == 2 */
+#  error Unknown GCDEXT_1_BINARY_METHOD
+# endif
+#endif
+
+  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
+  ug = t0 + t1;
+  vg = s0 + s1;
+
+  ugh = ug/2 + (ug & 1);
+  vgh = vg/2 + (vg & 1);
+
+  /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
+     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
+  for (i = 0; i < shift; i++)
+    {
+      mp_limb_t mask = - ( (s0 | t0) & 1);
+
+      s0 /= 2;
+      t0 /= 2;
+      s0 += mask & vgh;
+      t0 += mask & ugh;
+    }
+
+  ASSERT_ALWAYS (s0 <= vg);
+  ASSERT_ALWAYS (t0 <= ug);
+
+  if (s0 > vg - s0)
+    {
+      s0 -= vg;
+      t0 -= ug;
+    }
+#if GCDEXT_1_BINARY_METHOD == 2
+  /* Conditional negation. */
+  s0 = (s0 ^ det_sign) - det_sign;
+  t0 = (t0 ^ det_sign) - det_sign;
+#endif
+  *sp = s0;
+  *tp = -t0;
+
+  return u << zero_bits;
+}
+
+#else /* !GCDEXT_1_USE_BINARY */
+
+
+/* FIXME: Takes two single-word limbs. It could be extended to a
+ * function that accepts a bignum for the first input, and only
+ * returns the first co-factor. */
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
+	      mp_limb_t a, mp_limb_t b)
+{
+  /* Maintain
+
+     a =  u0 A + v0 B
+     b =  u1 A + v1 B
+
+     where A, B are the original inputs.
+  */
+  mp_limb_signed_t u0 = 1;
+  mp_limb_signed_t v0 = 0;
+  mp_limb_signed_t u1 = 0;
+  mp_limb_signed_t v1 = 1;
+
+  ASSERT (a > 0);
+  ASSERT (b > 0);
+
+  if (a < b)
+    goto divide_by_b;
+
+  for (;;)
+    {
+      mp_limb_t q;
+
+      q = a / b;
+      a -= q * b;
+
+      if (a == 0)
+	{
+	  *up = u1;
+	  *vp = v1;
+	  return b;
+	}
+      u0 -= q * u1;
+      v0 -= q * v1;
+
+    divide_by_b:
+      q = b / a;
+      b -= q * a;
+
+      if (b == 0)
+	{
+	  *up = u0;
+	  *vp = v0;
+	  return a;
+	}
+      u1 -= q * u0;
+      v1 -= q * v0;
+    }
+}
+#endif /* !GCDEXT_1_USE_BINARY */

diff --git a/mpn/generic/gcdext_lehmer.c b/mpn/generic/gcdext_lehmer.c
new file mode 100644
index 0000000..ea4e86d
--- /dev/null
+++ b/mpn/generic/gcdext_lehmer.c

@@ -0,0 +1,336 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Here, d is the index of the cofactor to update. FIXME: Could use qn
+   = 0 for the common case q = 1. */
+void
+mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
+		 mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
+  mp_size_t un = ctx->un;
+
+  if (gp)
+    {
+      mp_srcptr up;
+
+      ASSERT (gn > 0);
+      ASSERT (gp[gn-1] > 0);
+
+      MPN_COPY (ctx->gp, gp, gn);
+      ctx->gn = gn;
+
+      if (d < 0)
+	{
+	  int c;
+
+	  /* Must return the smallest cofactor, +u1 or -u0 */
+	  MPN_CMP (c, ctx->u0, ctx->u1, un);
+	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
+
+	  d = c < 0;
+	}
+
+      up = d ? ctx->u0 : ctx->u1;
+
+      MPN_NORMALIZE (up, un);
+      MPN_COPY (ctx->up, up, un);
+
+      *ctx->usize = d ? -un : un;
+    }
+  else
+    {
+      mp_limb_t cy;
+      mp_ptr u0 = ctx->u0;
+      mp_ptr u1 = ctx->u1;
+
+      ASSERT (d >= 0);
+
+      if (d)
+	MP_PTR_SWAP (u0, u1);
+
+      qn -= (qp[qn-1] == 0);
+
+      /* Update u0 += q  * u1 */
+      if (qn == 1)
+	{
+	  mp_limb_t q = qp[0];
+
+	  if (q == 1)
+	    /* A common case. */
+	    cy = mpn_add_n (u0, u0, u1, un);
+	  else
+	    cy = mpn_addmul_1 (u0, u1, un, q);
+	}
+      else
+	{
+	  mp_size_t u1n;
+	  mp_ptr tp;
+
+	  u1n = un;
+	  MPN_NORMALIZE (u1, u1n);
+
+	  if (u1n == 0)
+	    return;
+
+	  /* Should always have u1n == un here, and u1 >= u0. The
+	     reason is that we alternate adding u0 to u1 and u1 to u0
+	     (corresponding to subtractions a - b and b - a), and we
+	     can get a large quotient only just after a switch, which
+	     means that we'll add (a multiple of) the larger u to the
+	     smaller. */
+
+	  tp = ctx->tp;
+
+	  if (qn > u1n)
+	    mpn_mul (tp, qp, qn, u1, u1n);
+	  else
+	    mpn_mul (tp, u1, u1n, qp, qn);
+
+	  u1n += qn;
+	  u1n -= tp[u1n-1] == 0;
+
+	  if (u1n >= un)
+	    {
+	      cy = mpn_add (u0, tp, u1n, u0, un);
+	      un = u1n;
+	    }
+	  else
+	    /* Note: Unlikely case, maybe never happens? */
+	    cy = mpn_add (u0, u0, un, tp, u1n);
+
+	}
+      u0[un] = cy;
+      ctx->un = un + (cy > 0);
+    }
+}
+
+/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
+   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
+   need at most n for the quotient and n+1 for the u update (reusing
+   the extra u). In all, 4n + 3. */
+
+mp_size_t
+mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
+		     mp_ptr ap, mp_ptr bp, mp_size_t n,
+		     mp_ptr tp)
+{
+  mp_size_t ualloc = n + 1;
+
+  /* Keeps track of the second row of the reduction matrix
+   *
+   *   M = (v0, v1 ; u0, u1)
+   *
+   * which correspond to the first column of the inverse
+   *
+   *   M^{-1} = (u1, -v1; -u0, v0)
+   *
+   * This implies that
+   *
+   *   a =  u1 A (mod B)
+   *   b = -u0 A (mod B)
+   *
+   * where A, B denotes the input values.
+   */
+
+  struct gcdext_ctx ctx;
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+  mp_ptr u2;
+
+  MPN_ZERO (tp, 3*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+  u2 = tp; tp += ualloc;
+
+  u1[0] = 1; un = 1;
+
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usize;
+
+  /* FIXME: Handle n == 2 differently, after the loop? */
+  while (n >= 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else if (n == 2)
+	{
+	  /* We use the full inputs without truncation, so we can
+	     safely shift left. */
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
+	  al = ap[0] << shift;
+	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
+	  bl = bp[0] << shift;
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
+	  MP_PTR_SWAP (u0, u2);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  ctx.u0 = u0;
+	  ctx.u1 = u1;
+	  ctx.tp = u2;
+	  ctx.un = un;
+
+	  /* Temporary storage n for the quotient and ualloc for the
+	     new cofactor. */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  if (n == 0)
+	    return ctx.gn;
+
+	  un = ctx.un;
+	}
+    }
+  ASSERT_ALWAYS (ap[0] > 0);
+  ASSERT_ALWAYS (bp[0] > 0);
+
+  if (ap[0] == bp[0])
+    {
+      int c;
+
+      /* Which cofactor to return now? Candidates are +u1 and -u0,
+	 depending on which of a and b was most recently reduced,
+	 which we don't keep track of. So compare and get the smallest
+	 one. */
+
+      gp[0] = ap[0];
+
+      MPN_CMP (c, u0, u1, un);
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+	{
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usize = -un;
+	}
+      else
+	{
+	  MPN_NORMALIZE_NOT_ZERO (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usize = un;
+	}
+      return 1;
+    }
+  else
+    {
+      mp_limb_t uh, vh;
+      mp_limb_signed_t u;
+      mp_limb_signed_t v;
+      int negate;
+
+      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
+
+      /* Set up = u u1 - v u0. Keep track of size, un grows by one or
+	 two limbs. */
+
+      if (u == 0)
+	{
+	  ASSERT (v == 1);
+	  MPN_NORMALIZE (u0, un);
+	  MPN_COPY (up, u0, un);
+	  *usize = -un;
+	  return 1;
+	}
+      else if (v == 0)
+	{
+	  ASSERT (u == 1);
+	  MPN_NORMALIZE (u1, un);
+	  MPN_COPY (up, u1, un);
+	  *usize = un;
+	  return 1;
+	}
+      else if (u > 0)
+	{
+	  negate = 0;
+	  ASSERT (v < 0);
+	  v = -v;
+	}
+      else
+	{
+	  negate = 1;
+	  ASSERT (v > 0);
+	  u = -u;
+	}
+
+      uh = mpn_mul_1 (up, u1, un, u);
+      vh = mpn_addmul_1 (up, u0, un, v);
+
+      if ( (uh | vh) > 0)
+	{
+	  uh += vh;
+	  up[un++] = uh;
+	  if (uh < vh)
+	    up[un++] = 1;
+	}
+
+      MPN_NORMALIZE_NOT_ZERO (up, un);
+
+      *usize = negate ? -un : un;
+      return 1;
+    }
+}

diff --git a/mpn/generic/get_d.c b/mpn/generic/get_d.c
new file mode 100644
index 0000000..4c1f3b6
--- /dev/null
+++ b/mpn/generic/get_d.c

@@ -0,0 +1,438 @@
+/* mpn_get_d -- limbs to double conversion.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2007, 2009, 2010, 2012, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MANT_DIG and FLT_RADIX */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* To force use of the generic C code for testing, put
+   "#define _GMP_IEEE_FLOATS 0" at this point.  */
+
+
+/* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
+   rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
+   wrong if that addition overflows.
+
+   The workaround here avoids this bug by ensuring n is not a literal constant.
+   Note that this is alpha specific.  The offending transformation is/was in
+   alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
+
+   Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
+   and has the same solution.  Don't know why or how.  */
+
+#if HAVE_HOST_CPU_FAMILY_alpha				\
+  && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4))	\
+      || defined (_CRAY))
+static volatile const long CONST_1024 = 1024;
+static volatile const long CONST_NEG_1023 = -1023;
+static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
+#else
+#define CONST_1024	      (1024)
+#define CONST_NEG_1023	      (-1023)
+#define CONST_NEG_1022_SUB_53 (-1022 - 53)
+#endif
+
+
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.  Must have
+   size>=1, and a non-zero high limb ptr[size-1].
+
+   When we know the fp format, the result is truncated towards zero.  This is
+   consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
+   easy to implement and test.
+
+   When we do not know the format, such truncation seems much harder.  One
+   would need to defeat any rounding mode, including round-up.
+
+   It's felt that GMP is not primarily concerned with hardware floats, and
+   really isn't enhanced by getting involved with hardware rounding modes
+   (which could even be some weird unknown style), so something unambiguous and
+   straightforward is best.
+
+
+   The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
+   limb and is done with shifts and masks.  The 64-bit case in particular
+   should come out nice and compact.
+
+   The generic code used to work one bit at a time, which was not only slow,
+   but implicitly relied upon denorms for intermediates, since the lowest bits'
+   weight of a perfectly valid fp number underflows in non-denorm.  Therefore,
+   the generic code now works limb-per-limb, initially creating a number x such
+   that 1 <= x <= BASE.  (BASE is reached only as result of rounding.)  Then
+   x's exponent is scaled with explicit code (not ldexp to avoid libm
+   dependency).  It is a tap-dance to avoid underflow or overflow, beware!
+
+
+   Traps:
+
+   Hardware traps for overflow to infinity, underflow to zero, or unsupported
+   denorms may or may not be taken.  The IEEE code works bitwise and so
+   probably won't trigger them, the generic code works by float operations and
+   so probably will.  This difference might be thought less than ideal, but
+   again its felt straightforward code is better than trying to get intimate
+   with hardware exceptions (of perhaps unknown nature).
+
+
+   Not done:
+
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This might
+   still be worthwhile there (for up to the mantissa many bits), but for
+   mpn_get_d here, the cost of applying "exp" to the resulting exponent would
+   probably use up any benefit a cast may have over bit twiddling.  Also, if
+   the exponent is pushed into denorm range then bit twiddling is the only
+   option, to ensure the desired truncation is obtained.
+
+
+   Other:
+
+   For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
+   to the kernel for values >= 2^63.  This makes it slow, and worse the kernel
+   Linux (what versions?) apparently uses untested code in its trap handling
+   routines, and gets the sign wrong.  We don't use such a limb-to-double
+   cast, neither in the IEEE or generic code.  */
+
+
+
+#undef FORMAT_RECOGNIZED
+
+double
+mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
+{
+  int lshift, nbits;
+  mp_limb_t x, mhi, mlo;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (up, size);
+  ASSERT (size == 0 || up[size-1] != 0);
+
+  if (size == 0)
+    return 0.0;
+
+  /* Adjust exp to a radix point just above {up,size}, guarding against
+     overflow.  After this exp can of course be reduced to anywhere within
+     the {up,size} region without underflow.  */
+  if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS) * (unsigned long) (size)
+		> (LONG_MAX - (unsigned long) exp)))
+    {
+#if _GMP_IEEE_FLOATS
+      goto ieee_infinity;
+#endif
+
+      /* generic */
+      exp = LONG_MAX;
+    }
+  else
+    {
+      exp += GMP_NUMB_BITS * size;
+    }
+
+#if _GMP_IEEE_FLOATS
+    {
+      union ieee_double_extract u;
+
+      up += size;
+
+#if GMP_LIMB_BITS == 64
+      mlo = up[-1];
+      count_leading_zeros (lshift, mlo);
+
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      mlo <<= lshift;
+
+      nbits = GMP_LIMB_BITS - lshift;
+
+      if (nbits < 53 && size > 1)
+	{
+	  x = up[-2];
+	  x <<= GMP_NAIL_BITS;
+	  x >>= nbits;
+	  mlo |= x;
+	  nbits += GMP_NUMB_BITS;
+
+	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+	    {
+	      x = up[-3];
+	      x <<= GMP_NAIL_BITS;
+	      x >>= nbits;
+	      mlo |= x;
+	      nbits += GMP_NUMB_BITS;
+	    }
+	}
+      mhi = mlo >> (32 + 11);
+      mlo = mlo >> 11;		/* later implicitly truncated to 32 bits */
+#endif
+#if GMP_LIMB_BITS == 32
+      x = *--up;
+      count_leading_zeros (lshift, x);
+
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      x <<= lshift;
+      mhi = x >> 11;
+
+      if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+	{
+	  /* All 20 bits in mhi */
+	  mlo = x << 21;
+	  /* >= 1 bit in mlo */
+	  nbits = GMP_LIMB_BITS - lshift - 21;
+	}
+      else
+	{
+	  if (size > 1)
+	    {
+	      nbits = GMP_LIMB_BITS - lshift;
+
+	      x = *--up, size--;
+	      x <<= GMP_NAIL_BITS;
+	      mhi |= x >> nbits >> 11;
+
+	      mlo = x << (GMP_LIMB_BITS - nbits - 11);
+	      nbits = nbits + 11 - GMP_NAIL_BITS;
+	    }
+	  else
+	    {
+	      mlo = 0;
+	      goto done;
+	    }
+	}
+
+      /* Now all needed bits in mhi have been accumulated.  Add bits to mlo.  */
+
+      if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
+	{
+	  x = up[-1];
+	  x <<= GMP_NAIL_BITS;
+	  x >>= nbits;
+	  mlo |= x;
+	  nbits += GMP_NUMB_BITS;
+
+	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
+	    {
+	      x = up[-2];
+	      x <<= GMP_NAIL_BITS;
+	      x >>= nbits;
+	      mlo |= x;
+	      nbits += GMP_NUMB_BITS;
+
+	      if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
+		{
+		  x = up[-3];
+		  x <<= GMP_NAIL_BITS;
+		  x >>= nbits;
+		  mlo |= x;
+		  nbits += GMP_NUMB_BITS;
+		}
+	    }
+	}
+
+    done:;
+
+#endif
+      if (UNLIKELY (exp >= CONST_1024))
+	{
+	  /* overflow, return infinity */
+	ieee_infinity:
+	  mhi = 0;
+	  mlo = 0;
+	  exp = 1024;
+	}
+      else if (UNLIKELY (exp <= CONST_NEG_1023))
+	{
+	  int rshift;
+
+	  if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+	    return 0.0;	 /* denorm underflows to zero */
+
+	  rshift = -1022 - exp;
+	  ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+	  mlo >>= rshift;
+	  mhi = mlo >> 32;
+#else
+	  if (rshift >= 32)
+	    {
+	      mlo = mhi;
+	      mhi = 0;
+	      rshift -= 32;
+	    }
+	  lshift = GMP_LIMB_BITS - rshift;
+	  mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+	  mhi >>= rshift;
+#endif
+	  exp = -1023;
+	}
+      u.s.manh = mhi;
+      u.s.manl = mlo;
+      u.s.exp = exp + 1023;
+      u.s.sig = (sign < 0);
+      return u.d;
+    }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if HAVE_DOUBLE_VAX_D
+    {
+      union double_extract u;
+
+      up += size;
+
+      mhi = up[-1];
+
+      count_leading_zeros (lshift, mhi);
+      exp -= lshift;
+      mhi <<= lshift;
+
+      mlo = 0;
+      if (size > 1)
+	{
+	  mlo = up[-2];
+	  if (lshift != 0)
+	    mhi += mlo >> (GMP_LIMB_BITS - lshift);
+	  mlo <<= lshift;
+
+	  if (size > 2 && lshift > 8)
+	    {
+	      x = up[-3];
+	      mlo += x >> (GMP_LIMB_BITS - lshift);
+	    }
+	}
+
+      if (UNLIKELY (exp >= 128))
+	{
+	  /* overflow, return maximum number */
+	  mhi = 0xffffffff;
+	  mlo = 0xffffffff;
+	  exp = 127;
+	}
+      else if (UNLIKELY (exp < -128))
+	{
+	  return 0.0;	 /* underflows to zero */
+	}
+
+      u.s.man3 = mhi >> 24;	/* drop msb, since implicit */
+      u.s.man2 = mhi >> 8;
+      u.s.man1 = (mhi << 8) + (mlo >> 24);
+      u.s.man0 = mlo >> 8;
+      u.s.exp = exp + 128;
+      u.s.sig = sign < 0;
+      return u.d;
+    }
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if ! FORMAT_RECOGNIZED
+
+#if !defined(GMP_DBL_MANT_BITS)
+#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
+#define GMP_DBL_MANT_BITS DBL_MANT_DIG
+#else
+/* FIXME: Chose a smarter default value. */
+#define GMP_DBL_MANT_BITS (16 * sizeof (double))
+#endif
+#endif
+
+    { /* Non-IEEE or strange limb size, generically convert
+	 GMP_DBL_MANT_BITS bits. */
+      mp_limb_t l;
+      int m;
+      mp_size_t i;
+      double d, weight;
+      unsigned long uexp;
+
+      /* First generate an fp number disregarding exp, instead keeping things
+	 within the numb base factor from 1, which should prevent overflow and
+	 underflow even for the most exponent limited fp formats.  */
+      i = size - 1;
+      l = up[i];
+      count_leading_zeros (m, l);
+      m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
+      if (m < 0)
+	l &= GMP_NUMB_MAX << -m;
+      d = l;
+      for (weight = 1/MP_BASE_AS_DOUBLE; m > 0 && --i >= 0;)
+	{
+	  l = up[i];
+	  m -= GMP_NUMB_BITS;
+	  if (m < 0)
+	    l &= GMP_NUMB_MAX << -m;
+	  d += l * weight;
+	  weight /= MP_BASE_AS_DOUBLE;
+	  if (weight == 0)
+	    break;
+	}
+
+      /* Now apply exp.  */
+      exp -= GMP_NUMB_BITS;
+      if (exp > 0)
+	{
+	  weight = 2.0;
+	  uexp = exp;
+	}
+      else
+	{
+	  weight = 0.5;
+	  uexp = NEG_CAST (unsigned long, exp);
+	}
+#if 1
+      /* Square-and-multiply exponentiation.  */
+      if (uexp & 1)
+	d *= weight;
+      while (uexp >>= 1)
+	{
+	  weight *= weight;
+	  if (uexp & 1)
+	    d *= weight;
+	}
+#else
+      /* Plain exponentiation.  */
+      while (uexp > 0)
+	{
+	  d *= weight;
+	  uexp--;
+	}
+#endif
+
+      return sign >= 0 ? d : -d;
+    }
+#endif
+}

diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c
new file mode 100644
index 0000000..19cc581
--- /dev/null
+++ b/mpn/generic/get_str.c

@@ -0,0 +1,451 @@
+/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Conversion of U {up,un} to a string in base b.  Internally, we convert to
+   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
+
+  A) Divide U repeatedly by B, generating a quotient and remainder, until the
+     quotient becomes zero.  The remainders hold the converted digits.  Digits
+     come out from right to left.  (Used in mpn_bc_get_str.)
+
+  B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
+     Then develop digits by multiplying the fraction repeatedly by b.  Digits
+     come out from left to right.  (Currently not used herein, except for in
+     code for converting single limbs to individual digits.)
+
+  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
+     sqrt(U).  Then divide U by B^s, generating quotient and remainder.
+     Recursively convert the quotient, then the remainder, using the
+     precomputed powers.  Digits come out from left to right.  (Used in
+     mpn_dc_get_str.)
+
+  When using algorithm C, algorithm B might be suitable for basecase code,
+  since the required b^g power will be readily accessible.
+
+  Optimization ideas:
+  1. The recursive function of (C) could use less temporary memory.  The powtab
+     allocation could be trimmed with some computation, and the tmp area could
+     be reduced, or perhaps eliminated if up is reused for both quotient and
+     remainder (it is currently used just for remainder).
+  2. Store the powers of (C) in normalized form, with the normalization count.
+     Quotients will usually need to be left-shifted before each divide, and
+     remainders will either need to be left-shifted of right-shifted.
+  3. In the code for developing digits from a single limb, we could avoid using
+     a full umul_ppmm except for the first (or first few) digits, provided base
+     is even.  Subsequent digits can be developed using plain multiplication.
+     (This saves on register-starved machines (read x86) and on all machines
+     that generate the upper product half using a separate instruction (alpha,
+     powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
+  4. Separate mpn_dc_get_str basecase code from code for small conversions. The
+     former code will have the exact right power readily available in the
+     powtab parameter for dividing the current number into a fraction.  Convert
+     that using algorithm B.
+  5. Completely avoid division.  Compute the inverses of the powers now in
+     powtab instead of the actual powers.
+  6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
+     about 30% (1-log(5)/log(10)).
+
+  Basic structure of (C):
+    mpn_get_str:
+      if POW2_P (n)
+	...
+      else
+	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
+	  mpn_bx_get_str (str, base, up, un);
+	else
+	  precompute_power_tables
+	  mpn_dc_get_str
+
+    mpn_dc_get_str:
+	mpn_tdiv_qr
+	if (qn < GET_STR_DC_THRESHOLD)
+	  mpn_bc_get_str
+	else
+	  mpn_dc_get_str
+	if (rn < GET_STR_DC_THRESHOLD)
+	  mpn_bc_get_str
+	else
+	  mpn_dc_get_str
+
+
+  The reason for the two threshold values is the cost of
+  precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be
+  considerably larger than GET_STR_DC_THRESHOLD.  */
+
+
+/* The x86s and m68020 have a quotient and remainder "div" instruction and
+   gcc recognises an adjacent "/" and "%" can be combined using that.
+   Elsewhere "/" and "%" are either separate instructions, or separate
+   libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
+   A multiply and subtract should be faster than a "%" in those cases.  */
+#if HAVE_HOST_CPU_FAMILY_x86            \
+  || HAVE_HOST_CPU_m68020               \
+  || HAVE_HOST_CPU_m68030               \
+  || HAVE_HOST_CPU_m68040               \
+  || HAVE_HOST_CPU_m68060               \
+  || HAVE_HOST_CPU_m68360 /* CPU32 */
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) % (d);         \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#else
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) - __q*(d);     \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#endif
+
+
+/* Convert {up,un} to a string in base base, and put the result in str.
+   Generate len characters, possibly padding with zeros to the left.  If len is
+   zero, generate as many characters as required.  Return a pointer immediately
+   after the last digit of the result string.  Complexity is O(un^2); intended
+   for small conversions.  */
+static unsigned char *
+mpn_bc_get_str (unsigned char *str, size_t len,
+		mp_ptr up, mp_size_t un, int base)
+{
+  mp_limb_t rl, ul;
+  unsigned char *s;
+  size_t l;
+  /* Allocate memory for largest possible string, given that we only get here
+     for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
+     base is 3.  7/11 is an approximation to 1/log2(3).  */
+#if TUNE_PROGRAM_BUILD
+#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
+#else
+#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
+#endif
+  unsigned char buf[BUF_ALLOC];
+#if TUNE_PROGRAM_BUILD
+  mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
+#else
+  mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
+#endif
+
+  if (base == 10)
+    {
+      /* Special case code for base==10 so that the compiler has a chance to
+	 optimize things.  */
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+	{
+	  int i;
+	  mp_limb_t frac, digit;
+	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+					 MP_BASES_BIG_BASE_10,
+					 MP_BASES_BIG_BASE_INVERTED_10,
+					 MP_BASES_NORMALIZATION_STEPS_10);
+	  un -= rp[un] == 0;
+	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
+	  s -= MP_BASES_CHARS_PER_LIMB_10;
+#if HAVE_HOST_CPU_FAMILY_x86
+	  /* The code below turns out to be a bit slower for x86 using gcc.
+	     Use plain code.  */
+	  i = MP_BASES_CHARS_PER_LIMB_10;
+	  do
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  while (--i);
+#else
+	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
+	     After a few umul_ppmm, we will have accumulated enough low zeros
+	     to use a plain multiply.  */
+	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
+	    {
+	      umul_ppmm (digit, frac, frac, 10);
+	      *s++ = digit;
+	    }
+	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
+					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
+					     : 0));
+	  frac = (frac + 0xf) >> 4;
+	  do
+	    {
+	      frac *= 10;
+	      digit = frac >> (GMP_LIMB_BITS - 4);
+	      *s++ = digit;
+	      frac &= (~(mp_limb_t) 0) >> 4;
+	    }
+	  while (--i);
+#endif
+	  s -= MP_BASES_CHARS_PER_LIMB_10;
+	}
+
+      ul = rp[1];
+      while (ul != 0)
+	{
+	  udiv_qrnd_unnorm (ul, rl, ul, 10);
+	  *--s = rl;
+	}
+    }
+  else /* not base 10 */
+    {
+      unsigned chars_per_limb;
+      mp_limb_t big_base, big_base_inverted;
+      unsigned normalization_steps;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+      big_base = mp_bases[base].big_base;
+      big_base_inverted = mp_bases[base].big_base_inverted;
+      count_leading_zeros (normalization_steps, big_base);
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+	{
+	  int i;
+	  mp_limb_t frac;
+	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+					 big_base, big_base_inverted,
+					 normalization_steps);
+	  un -= rp[un] == 0;
+	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
+	  s -= chars_per_limb;
+	  i = chars_per_limb;
+	  do
+	    {
+	      mp_limb_t digit;
+	      umul_ppmm (digit, frac, frac, base);
+	      *s++ = digit;
+	    }
+	  while (--i);
+	  s -= chars_per_limb;
+	}
+
+      ul = rp[1];
+      while (ul != 0)
+	{
+	  udiv_qrnd_unnorm (ul, rl, ul, base);
+	  *--s = rl;
+	}
+    }
+
+  l = buf + BUF_ALLOC - s;
+  while (l < len)
+    {
+      *str++ = 0;
+      len--;
+    }
+  while (l != 0)
+    {
+      *str++ = *s++;
+      l--;
+    }
+  return str;
+}
+
+
+/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
+   the string in STR.  Generate LEN characters, possibly padding with zeros to
+   the left.  If LEN is zero, generate as many characters as required.
+   Return a pointer immediately after the last digit of the result string.
+   This uses divide-and-conquer and is intended for large conversions.  */
+static unsigned char *
+mpn_dc_get_str (unsigned char *str, size_t len,
+		mp_ptr up, mp_size_t un,
+		const powers_t *powtab, mp_ptr tmp)
+{
+  if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
+    {
+      if (un != 0)
+	str = mpn_bc_get_str (str, len, up, un, powtab->base);
+      else
+	{
+	  while (len != 0)
+	    {
+	      *str++ = 0;
+	      len--;
+	    }
+	}
+    }
+  else
+    {
+      mp_ptr pwp, qp, rp;
+      mp_size_t pwn, qn;
+      mp_size_t sn;
+
+      pwp = powtab->p;
+      pwn = powtab->n;
+      sn = powtab->shift;
+
+      if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
+	{
+	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
+	}
+      else
+	{
+	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
+	  rp = up;		/* pwn limbs for rp; overwrite up area */
+
+	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
+	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
+
+	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
+
+	  if (len != 0)
+	    len = len - powtab->digits_in_base;
+
+	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
+	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
+	}
+    }
+  return str;
+}
+
+/* There are no leading zeros on the digits generated at str, but that's not
+   currently a documented feature.  The current mpz_out_str and mpz_get_str
+   rely on it.  */
+
+size_t
+mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
+{
+  mp_ptr powtab_mem;
+  powers_t powtab[GMP_LIMB_BITS];
+  int pi;
+  size_t out_len;
+  mp_ptr tmp;
+  TMP_DECL;
+
+  /* Special case zero, as the code below doesn't handle it.  */
+  if (un == 0)
+    {
+      str[0] = 0;
+      return 1;
+    }
+
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Convert from most significant end.  */
+      mp_limb_t n1, n0;
+      int bits_per_digit = mp_bases[base].big_base;
+      int cnt;
+      int bit_pos;
+      mp_size_t i;
+      unsigned char *s = str;
+      mp_bitcnt_t bits;
+
+      n1 = up[un - 1];
+      count_leading_zeros (cnt, n1);
+
+      /* BIT_POS should be R when input ends in least significant nibble,
+	 R + bits_per_digit * n when input ends in nth least significant
+	 nibble. */
+
+      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+      cnt = bits % bits_per_digit;
+      if (cnt != 0)
+	bits += bits_per_digit - cnt;
+      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+
+      /* Fast loop for bit output.  */
+      i = un - 1;
+      for (;;)
+	{
+	  bit_pos -= bits_per_digit;
+	  while (bit_pos >= 0)
+	    {
+	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+	      bit_pos -= bits_per_digit;
+	    }
+	  i--;
+	  if (i < 0)
+	    break;
+	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+	  n1 = up[i];
+	  bit_pos += GMP_NUMB_BITS;
+	  *s++ = n0 | (n1 >> bit_pos);
+	}
+
+      return s - str;
+    }
+
+  /* General case.  The base is not a power of 2.  */
+
+  if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str;
+
+  TMP_MARK;
+
+  /* Allocate one large block for the powers of big_base.  */
+  powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+  /* Compute a table of powers, were the largest power is >= sqrt(U).  */
+  size_t ndig;
+  mp_size_t xn;
+  DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
+  xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
+
+  pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base);
+
+  /* Using our precomputed powers, now in powtab[], convert our number.  */
+  tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
+  TMP_FREE;
+
+  return out_len;
+}

diff --git a/mpn/generic/gmp-mparam.h b/mpn/generic/gmp-mparam.h
new file mode 100644
index 0000000..7dc057a
--- /dev/null
+++ b/mpn/generic/gmp-mparam.h

@@ -0,0 +1,33 @@
+/* Generic C gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+   in config.h. */

diff --git a/mpn/generic/hgcd.c b/mpn/generic/hgcd.c
new file mode 100644
index 0000000..e3e9c66
--- /dev/null
+++ b/mpn/generic/hgcd.c

@@ -0,0 +1,182 @@
+/* hgcd.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Size analysis for hgcd:
+
+   For the recursive calls, we have n1 <= ceil(n / 2). Then the
+   storage need is determined by the storage for the recursive call
+   computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
+   (after this, the storage needed for M1 can be recycled).
+
+   Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
+   = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
+   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
+   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+
+   For the recursive call, we need S(n1) = S(ceil(n/2)).
+
+   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
+	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
+	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
+	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+*/
+
+mp_size_t
+mpn_hgcd_itch (mp_size_t n)
+{
+  unsigned k;
+  int count;
+  mp_size_t nscaled;
+
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return n;
+
+  /* Get the recursion depth. */
+  nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
+  count_leading_zeros (count, nscaled);
+  k = GMP_LIMB_BITS - count;
+
+  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+mp_size_t
+mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
+	  struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+
+  mp_size_t nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+	{
+	  n = nn;
+	  success = 1;
+	}
+
+      /* NOTE: It appears this loop never runs more than once (at
+	 least when not recursing to hgcd_appr). */
+      while (n > n2)
+	{
+	  /* Needs n + 1 storage */
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    return success ? n : 0;
+
+	  n = nn;
+	  success = 1;
+	}
+
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+
+	  /* FIXME: Should use hgcd_reduce, but that may require more
+	     scratch space, which requires review. */
+
+	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+	  if (nn > 0)
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      success = 1;
+	    }
+	}
+    }
+
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+	return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}

diff --git a/mpn/generic/hgcd2-div.h b/mpn/generic/hgcd2-div.h
new file mode 100644
index 0000000..45ba453
--- /dev/null
+++ b/mpn/generic/hgcd2-div.h

@@ -0,0 +1,504 @@
+/* hgcd2-div.h
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 3
+#endif
+
+#ifndef HGCD2_DIV2_METHOD
+#define HGCD2_DIV2_METHOD 2
+#endif
+
+#if HAVE_NATIVE_mpn_div_11
+
+#define div1 mpn_div_11
+/* Single-limb division optimized for small quotients.
+   Returned value holds d0 = r, d1 = q. */
+mp_double_limb_t div1 (mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV1_METHOD == 1
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  res.d1 = n0 / d0;
+  res.d0 = n0 - res.d1 * d0;
+
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 2
+
+static mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  int ncnt, dcnt, cnt;
+  mp_limb_t q;
+  mp_limb_t mask;
+
+  ASSERT (n0 >= d0);
+
+  count_leading_zeros (ncnt, n0);
+  count_leading_zeros (dcnt, d0);
+  cnt = dcnt - ncnt;
+
+  d0 <<= cnt;
+
+  q = -(mp_limb_t) (n0 >= d0);
+  n0 -= d0 & q;
+  d0 >>= 1;
+  q = -q;
+
+  while (--cnt >= 0)
+    {
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      d0 >>= 1;
+      q = (q << 1) - mask;
+    }
+
+  res.d0 = n0;
+  res.d1 = q;
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 3
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
+      || UNLIKELY (n0 >= (d0 << 3)))
+    {
+      res.d1 = n0 / d0;
+      res.d0 = n0 - res.d1 * d0;
+    }
+  else
+    {
+      mp_limb_t q, mask;
+
+      d0 <<= 2;
+
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q = 4 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q += 2 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      n0 -= d0 & mask;
+      q -= mask;
+
+      res.d0 = n0;
+      res.d1 = q;
+    }
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 4
+
+/* Table quotients.  We extract the NBITS most significant bits of the
+   numerator limb, and the corresponding bits from the divisor limb, and use
+   these to form an index into the table.  This method is probably only useful
+   for short pipelines with slow multiplication.
+
+   Possible improvements:
+
+   * Perhaps extract the highest NBITS of the divisor instead of the same bits
+     as from the numerator.  That would require another count_leading_zeros,
+     and a post-multiply shift of the quotient.
+
+   * Compress tables?  Their values are tiny, and there are lots of zero
+     entries (which are never used).
+
+   * Round the table entries more cleverly?
+*/
+
+#ifndef NBITS
+#define NBITS 5
+#endif
+
+#if NBITS == 5
+/* This needs full division about 13.2% of the time. */
+static const unsigned char tab[512] = {
+17, 9, 5,4,3,2,2,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18, 9, 6,4,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,10, 6,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+20,10, 6,5,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+21,11, 7,5,4,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+22,11, 7,5,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+23,12, 7,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+24,12, 8,6,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+25,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+26,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+27,14, 9,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+28,14, 9,7,5,4,3,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+29,15,10,7,5,4,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+30,15,10,7,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+31,16,10,7,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+32,16,11,8,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#elif NBITS == 6
+/* This needs full division about 9.8% of the time. */
+static const unsigned char tab[2048] = {
+33,17,11, 8, 6, 5,4,4,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+34,17,11, 8, 6, 5,4,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+35,18,12, 9, 7, 5,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+36,18,12, 9, 7, 6,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+37,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+38,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+39,20,13,10, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+40,20,14,10, 8, 6,5,5,4,3,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+41,21,14,10, 8, 6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+42,21,14,10, 8, 7,6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+43,22,15,11, 8, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+44,22,15,11, 9, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+45,23,15,11, 9, 7,6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+46,23,16,11, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+47,24,16,12, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+48,24,16,12, 9, 8,6,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+49,25,17,12,10, 8,7,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+50,25,17,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+51,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+52,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+53,27,18,13,10, 9,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
+54,27,19,14,11, 9,7,6,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+55,28,19,14,11, 9,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
+56,28,19,14,11, 9,8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+57,29,20,14,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
+58,29,20,15,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+59,30,20,15,12,10,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
+60,30,21,15,12,10,8,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+61,31,21,15,12,10,8,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
+62,31,22,16,12,10,9,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+63,32,22,16,13,10,9,7,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+64,32,22,16,13,10,9,8,7,6,5,5,4,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,
+ 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+#else
+#error No table for provided NBITS
+#endif
+
+/* Doing tabp with a #define makes compiler warnings about pointing outside an
+   object go away.  We used to define this as a variable.  It is not clear if
+   e.g.  (vector[100] - 10) + 10 is well- defined as per the C standard;
+   (vector[100] + 10) - 10 surely is and there is no sequence point so the
+   expressions should be equivalent.  To make this safe, we might want to
+   define tabp as a macro with the index as an argument.  Depending on the
+   platform, relocs might allow for assembly-time or linker-time resolution to
+   take place. */
+#define tabp (tab - (1 << (NBITS - 1) << NBITS))
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  int ncnt;
+  size_t nbi, dbi;
+  mp_limb_t q0;
+  mp_limb_t r0;
+  mp_limb_t mask;
+  mp_double_limb_t res;
+
+  ASSERT (n0 >= d0);		/* Actually only msb position is critical. */
+
+  count_leading_zeros (ncnt, n0);
+  nbi = n0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+  dbi = d0 << ncnt >> (GMP_LIMB_BITS - NBITS);
+
+  q0 = tabp[(nbi << NBITS) + dbi];
+  r0 = n0 - q0 * d0;
+  mask = -(mp_limb_t) (r0 >= d0);
+  q0 -= mask;
+  r0 -= d0 & mask;
+
+  if (UNLIKELY (r0 >= d0))
+    {
+      q0 = n0 / d0;
+      r0 = n0 - q0 * d0;
+    }
+
+  res.d1 = q0;
+  res.d0 = r0;
+  return res;
+}
+
+#elif HGCD2_DIV1_METHOD == 5
+
+/* Table inverses of divisors.  We don't bother with suppressing the msb from
+   the tables.  We index with the NBITS most significant divisor bits,
+   including the always-set highest bit, but use addressing trickery via tabp
+   to suppress it.
+
+   Possible improvements:
+
+   * Do first multiply using 32-bit operations on 64-bit computers.  At least
+     on most Arm64 cores, that uses 3 times less resources.  It also saves on
+     many x86-64 processors.
+*/
+
+#ifndef NBITS
+#define NBITS 7
+#endif
+
+#if NBITS == 5
+/* This needs full division about 1.63% of the time. */
+static const unsigned char tab[16] = {
+ 63, 59, 55, 52, 50, 47, 45, 43, 41, 39, 38, 36, 35, 34, 33, 32
+};
+#elif NBITS == 6
+/* This needs full division about 0.93% of the time. */
+static const unsigned char tab[32] = {
+127,123,119,116,112,109,106,104,101, 98, 96, 94, 92, 90, 88, 86,
+ 84, 82, 80, 79, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64
+};
+#elif NBITS == 7
+/* This needs full division about 0.49% of the time. */
+static const unsigned char tab[64] = {
+255,251,247,243,239,236,233,229,226,223,220,217,214,211,209,206,
+203,201,198,196,194,191,189,187,185,183,181,179,177,175,173,171,
+169,167,166,164,162,161,159,158,156,155,153,152,150,149,147,146,
+145,143,142,141,140,139,137,136,135,134,133,132,131,130,129,128
+};
+#elif NBITS == 8
+/* This needs full division about 0.26% of the time. */
+static const unsigned short tab[128] = {
+511,507,503,499,495,491,488,484,480,477,473,470,467,463,460,457,
+454,450,447,444,441,438,435,433,430,427,424,421,419,416,413,411,
+408,406,403,401,398,396,393,391,389,386,384,382,380,377,375,373,
+371,369,367,365,363,361,359,357,355,353,351,349,347,345,343,342,
+340,338,336,335,333,331,329,328,326,325,323,321,320,318,317,315,
+314,312,311,309,308,306,305,303,302,301,299,298,296,295,294,292,
+291,290,288,287,286,285,283,282,281,280,279,277,276,275,274,273,
+272,270,269,268,267,266,265,264,263,262,261,260,259,258,257,256
+};
+#else
+#error No table for provided NBITS
+#endif
+
+/* Doing tabp with a #define makes compiler warnings about pointing outside an
+   object go away.  We used to define this as a variable.  It is not clear if
+   e.g.  (vector[100] - 10) + 10 is well- defined as per the C standard;
+   (vector[100] + 10) - 10 surely is and there is no sequence point so the
+   expressions should be equivalent.  To make this safe, we might want to
+   define tabp as a macro with the index as an argument.  Depending on the
+   platform, relocs might allow for assembly-time or linker-time resolution to
+   take place. */
+#define tabp (tab - (1 << (NBITS - 1)))
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  int ncnt, dcnt;
+  size_t dbi;
+  mp_limb_t inv;
+  mp_limb_t q0;
+  mp_limb_t r0;
+  mp_limb_t mask;
+  mp_double_limb_t res;
+
+  count_leading_zeros (ncnt, n0);
+  count_leading_zeros (dcnt, d0);
+
+  dbi = d0 << dcnt >> (GMP_LIMB_BITS - NBITS);
+  inv = tabp[dbi];
+  q0 = ((n0 << ncnt) >> (NBITS + 1)) * inv >> (GMP_LIMB_BITS - 1 + ncnt - dcnt);
+  r0 = n0 - q0 * d0;
+  mask = -(mp_limb_t) (r0 >= d0);
+  q0 -= mask;
+  r0 -= d0 & mask;
+
+  if (UNLIKELY (r0 >= d0))
+    {
+      q0 = n0 / d0;
+      r0 = n0 - q0 * d0;
+    }
+
+  res.d1 = q0;
+  res.d0 = r0;
+  return res;
+}
+
+#else
+#error Unknown HGCD2_DIV1_METHOD
+#endif
+
+#if HAVE_NATIVE_mpn_div_22
+
+#define div2 mpn_div_22
+/* Two-limb division optimized for small quotients.  */
+mp_limb_t div2 (mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#elif HGCD2_DIV2_METHOD == 1
+
+static mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t n1, mp_limb_t n0,
+      mp_limb_t d1, mp_limb_t d0)
+{
+  mp_double_limb_t rq = div1 (n1, d1);
+  if (UNLIKELY (rq.d1 > d1))
+    {
+      mp_limb_t n2, q, t1, t0;
+      int c;
+
+      /* Normalize */
+      count_leading_zeros (c, d1);
+      ASSERT (c > 0);
+
+      n2 = n1 >> (GMP_LIMB_BITS - c);
+      n1 = (n1 << c) | (n0 >> (GMP_LIMB_BITS - c));
+      n0 <<= c;
+      d1 = (d1 << c) | (d0 >> (GMP_LIMB_BITS - c));
+      d0 <<= c;
+
+      udiv_qrnnd (q, n1, n2, n1, d1);
+      umul_ppmm (t1, t0, q, d0);
+      if (t1 > n1 || (t1 == n1 && t0 > n0))
+	{
+	  ASSERT (q > 0);
+	  q--;
+	  sub_ddmmss (t1, t0, t1, t0, d1, d0);
+	}
+      sub_ddmmss (n1, n0, n1, n0, t1, t0);
+
+      /* Undo normalization */
+      rp[0] = (n0 >> c) | (n1 << (GMP_LIMB_BITS - c));
+      rp[1] = n1 >> c;
+
+      return q;
+    }
+  else
+    {
+      mp_limb_t q, t1, t0;
+      n1 = rq.d0;
+      q = rq.d1;
+      umul_ppmm (t1, t0, q, d0);
+      if (UNLIKELY (t1 >= n1) && (t1 > n1 || t0 > n0))
+	{
+	  ASSERT (q > 0);
+	  q--;
+	  sub_ddmmss (t1, t0, t1, t0, d1, d0);
+	}
+      sub_ddmmss (rp[1], rp[0], n1, n0, t1, t0);
+      return q;
+    }
+}
+
+#elif HGCD2_DIV2_METHOD == 2
+
+/* Bit-wise div2. Relies on fast count_leading_zeros. */
+static mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t n1, mp_limb_t n0,
+      mp_limb_t d1, mp_limb_t d0)
+{
+  mp_limb_t q = 0;
+  int ncnt;
+  int dcnt;
+
+  count_leading_zeros (ncnt, n1);
+  count_leading_zeros (dcnt, d1);
+  dcnt -= ncnt;
+
+  d1 = (d1 << dcnt) + (d0 >> 1 >> (GMP_LIMB_BITS - 1 - dcnt));
+  d0 <<= dcnt;
+
+  do
+    {
+      mp_limb_t mask;
+      q <<= 1;
+      if (UNLIKELY (n1 == d1))
+	mask = -(n0 >= d0);
+      else
+	mask = -(n1 > d1);
+
+      q -= mask;
+
+      sub_ddmmss (n1, n0, n1, n0, mask & d1, mask & d0);
+
+      d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
+      d1 = d1 >> 1;
+    }
+  while (dcnt--);
+
+  rp[0] = n0;
+  rp[1] = n1;
+
+  return q;
+}
+#else
+#error Unknown HGCD2_DIV2_METHOD
+#endif

diff --git a/mpn/generic/hgcd2.c b/mpn/generic/hgcd2.c
new file mode 100644
index 0000000..43d4d48
--- /dev/null
+++ b/mpn/generic/hgcd2.c

@@ -0,0 +1,283 @@
+/* hgcd2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/generic/hgcd2-div.h"
+
+#if GMP_NAIL_BITS != 0
+#error Nails not implemented
+#endif
+
+/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
+   matrix M. Returns 1 if we make progress, i.e. can perform at least
+   one subtraction. Otherwise returns zero. */
+
+/* FIXME: Possible optimizations:
+
+   The div2 function starts with checking the most significant bit of
+   the numerator. We can maintained normalized operands here, call
+   hgcd with normalized operands only, which should make the code
+   simpler and possibly faster.
+
+   Experiment with table lookups on the most significant bits.
+
+   This function is also a candidate for assembler implementation.
+*/
+int
+mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+	   struct hgcd_matrix1 *M)
+{
+  mp_limb_t u00, u01, u10, u11;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+	return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+	return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+	goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  break;
+	}
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+	 1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+	goto done;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, ah, al, bh, bl);
+	  al = r[0]; ah = r[1];
+	  if (ah < 2)
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      goto done;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	}
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+	goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  goto subtract_a1;
+	}
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+	 1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+	goto done;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, bh, bl, ah, al);
+	  bl = r[0]; bh = r[1];
+	  if (bh < 2)
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      goto done;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	}
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't get a
+     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (ah, bh);
+	  mp_limb_t q = rq.d1;
+	  ah = rq.d0;
+
+	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      break;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	}
+    subtract_a1:
+      ASSERT (bh >= ah);
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (bh, ah);
+	  mp_limb_t q = rq.d1;
+	  bh = rq.d0;
+
+	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      break;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	}
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+
+  return 1;
+}
+
+/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
+ * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
+mp_size_t
+mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
+			     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     r  = u00 * a
+     r += u10 * b
+     b *= u11
+     b += u01 * a
+  */
+
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
+  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
+#else
+  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
+  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
+
+  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
+  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
+#endif
+  rp[n] = ah;
+  bp[n] = bh;
+
+  n += (ah | bh) > 0;
+  return n;
+}

diff --git a/mpn/generic/hgcd2_jacobi.c b/mpn/generic/hgcd2_jacobi.c
new file mode 100644
index 0000000..95d4af1
--- /dev/null
+++ b/mpn/generic/hgcd2_jacobi.c

@@ -0,0 +1,251 @@
+/* hgcd2_jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2011, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/generic/hgcd2-div.h"
+
+#if GMP_NAIL_BITS != 0
+#error Nails not implemented
+#endif
+
+int
+mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+		  struct hgcd_matrix1 *M, unsigned *bitsp)
+{
+  mp_limb_t u00, u01, u10, u11;
+  unsigned bits = *bitsp;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+	return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+      bits = mpn_jacobi_update (bits, 1, 1);
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+	return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+      bits = mpn_jacobi_update (bits, 0, 1);
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+	goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  break;
+	}
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+	 1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+	goto done;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	  bits = mpn_jacobi_update (bits, 1, 1);
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, ah, al, bh, bl);
+	  al = r[0]; ah = r[1];
+	  if (ah < 2)
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      bits = mpn_jacobi_update (bits, 1, q & 3);
+	      goto done;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	  bits = mpn_jacobi_update (bits, 1, q & 3);
+	}
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+	goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+	{
+	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+	  goto subtract_a1;
+	}
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+	 1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+	goto done;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	  bits = mpn_jacobi_update (bits, 0, 1);
+	}
+      else
+	{
+	  mp_limb_t r[2];
+	  mp_limb_t q = div2 (r, bh, bl, ah, al);
+	  bl = r[0]; bh = r[1];
+	  if (bh < 2)
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      bits = mpn_jacobi_update (bits, 0, q & 3);
+	      goto done;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	  bits = mpn_jacobi_update (bits, 0, q & 3);
+	}
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't get a
+     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (ah <= bh)
+	{
+	  /* Use q = 1 */
+	  u01 += u00;
+	  u11 += u10;
+	  bits = mpn_jacobi_update (bits, 1, 1);
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (ah, bh);
+	  mp_limb_t q = rq.d1;
+	  ah = rq.d0;
+
+	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* A is too small, but q is correct. */
+	      u01 += q * u00;
+	      u11 += q * u10;
+	      bits = mpn_jacobi_update (bits, 1, q & 3);
+	      break;
+	    }
+	  q++;
+	  u01 += q * u00;
+	  u11 += q * u10;
+	  bits = mpn_jacobi_update (bits, 1, q & 3);
+	}
+    subtract_a1:
+      ASSERT (bh >= ah);
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+	break;
+
+      if (bh <= ah)
+	{
+	  /* Use q = 1 */
+	  u00 += u01;
+	  u10 += u11;
+	  bits = mpn_jacobi_update (bits, 0, 1);
+	}
+      else
+	{
+	  mp_double_limb_t rq = div1 (bh, ah);
+	  mp_limb_t q = rq.d1;
+	  bh = rq.d0;
+
+	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+	    {
+	      /* B is too small, but q is correct. */
+	      u00 += q * u01;
+	      u10 += q * u11;
+	      bits = mpn_jacobi_update (bits, 0, q & 3);
+	      break;
+	    }
+	  q++;
+	  u00 += q * u01;
+	  u10 += q * u11;
+	  bits = mpn_jacobi_update (bits, 0, q & 3);
+	}
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+  *bitsp = bits;
+
+  return 1;
+}

diff --git a/mpn/generic/hgcd_appr.c b/mpn/generic/hgcd_appr.c
new file mode 100644
index 0000000..bb01738
--- /dev/null
+++ b/mpn/generic/hgcd_appr.c

@@ -0,0 +1,267 @@
+/* hgcd_appr.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+   HGCD_THRESHOLD at the end? */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    return n;
+  else
+    {
+      unsigned k;
+      int count;
+      mp_size_t nscaled;
+
+      /* Get the recursion depth. */
+      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+      count_leading_zeros (count, nscaled);
+      k = GMP_LIMB_BITS - count;
+
+      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+    }
+}
+
+/* Destroys inputs. */
+int
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
+	       struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s;
+  int success = 0;
+
+  ASSERT (n > 0);
+
+  ASSERT ((ap[n-1] | bp[n-1]) != 0);
+
+  if (n <= 2)
+    /* Implies s = n. A fairly uninteresting case but exercised by the
+       random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+     we discard some of the least significant limbs, we must keep one
+     additional bit to account for the truncation error. We maintain
+     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
+
+  s = n/2 + 1;
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    {
+      unsigned extra_bits = 0;
+
+      while (n > 2)
+	{
+	  mp_size_t nn;
+
+	  ASSERT (n > s);
+	  ASSERT (n <= 2*s);
+
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    break;
+
+	  n = nn;
+	  success = 1;
+
+	  /* We can truncate and discard the lower p bits whenever nbits <=
+	     2*sbits - p. To account for the truncation error, we must
+	     adjust
+
+	     sbits <-- sbits + 1 - p,
+
+	     rather than just sbits <-- sbits - p. This adjustment makes
+	     the produced matrix slightly smaller than it could be. */
+
+	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
+	    {
+	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
+
+	      if (extra_bits == 0)
+		{
+		  /* We cross a limb boundary and bump s. We can't do that
+		     if the result is that it makes makes min(U, V)
+		     smaller than 2^{GMP_NUMB_BITS} s. */
+		  if (s + 1 == n
+		      || mpn_zero_p (ap + s + 1, n - s - 1)
+		      || mpn_zero_p (bp + s + 1, n - s - 1))
+		    continue;
+
+		  extra_bits = GMP_NUMB_BITS - 1;
+		  s++;
+		}
+	      else
+		{
+		  extra_bits--;
+		}
+
+	      /* Drop the p least significant limbs */
+	      ap += p; bp += p; n -= p; s -= p;
+	    }
+	}
+
+      ASSERT (s > 0);
+
+      if (extra_bits > 0)
+	{
+	  /* We can get here only of we have dropped at least one of the least
+	     significant bits, so we can decrement ap and bp. We can then shift
+	     left extra bits using mpn_rshift. */
+	  /* NOTE: In the unlikely case that n is large, it would be preferable
+	     to do an initial subdiv step to reduce the size before shifting,
+	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
+	     count rather than a limb count. */
+	  ap--; bp--;
+	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+	  n += (ap[n] | bp[n]) > 0;
+
+	  ASSERT (success);
+
+	  while (n > 2)
+	    {
+	      mp_size_t nn;
+
+	      ASSERT (n > s);
+	      ASSERT (n <= 2*s);
+
+	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+	      if (!nn)
+		return 1;
+
+	      n = nn;
+	    }
+	}
+
+      if (n == 2)
+	{
+	  struct hgcd_matrix1 M1;
+	  ASSERT (s == 1);
+
+	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
+	    {
+	      /* Multiply M <- M * M1 */
+	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+	      success = 1;
+	    }
+	}
+      return success;
+    }
+  else
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+      mp_size_t nn;
+
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+	{
+	  n = nn;
+	  /* FIXME: Discard some of the low limbs immediately? */
+	  success = 1;
+	}
+
+      while (n > n2)
+	{
+	  mp_size_t nn;
+
+	  /* Needs n + 1 storage */
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+	  if (!nn)
+	    return success;
+
+	  n = nn;
+	  success = 1;
+	}
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      return 1;
+	    }
+	}
+
+      for(;;)
+	{
+	  mp_size_t nn;
+
+	  ASSERT (n > s);
+	  ASSERT (n <= 2*s);
+
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+	  if (!nn)
+	    return success;
+
+	  n = nn;
+	  success = 1;
+	}
+    }
+}

diff --git a/mpn/generic/hgcd_jacobi.c b/mpn/generic/hgcd_jacobi.c
new file mode 100644
index 0000000..24014ce
--- /dev/null
+++ b/mpn/generic/hgcd_jacobi.c

@@ -0,0 +1,243 @@
+/* hgcd_jacobi.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This file is almost a copy of hgcd.c, with some added calls to
+   mpn_jacobi_update */
+
+struct hgcd_jacobi_ctx
+{
+  struct hgcd_matrix *M;
+  unsigned *bitsp;
+};
+
+static void
+hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+		  mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+	 gcd_subdiv_step, which stores q at the start of that area. We
+	 now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+
+      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
+      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of M.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  {
+    struct hgcd_jacobi_ctx ctx;
+    ctx.M = M;
+    ctx.bitsp = bitsp;
+
+    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
+  }
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+/* Same scratch requirements as for mpn_hgcd. */
+mp_size_t
+mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
+		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+
+  mp_size_t nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
+      if (nn > 0)
+	{
+	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	     = 2 (n - 1) */
+	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+	  success = 1;
+	}
+      while (n > n2)
+	{
+	  /* Needs n + 1 storage */
+	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+	  if (!nn)
+	    return success ? n : 0;
+	  n = nn;
+	  success = 1;
+	}
+
+      if (n > s + 2)
+	{
+	  struct hgcd_matrix M1;
+	  mp_size_t scratch;
+
+	  p = 2*s - n + 1;
+	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+	  mpn_hgcd_matrix_init(&M1, n - p, tp);
+	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
+	  if (nn > 0)
+	    {
+	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	      ASSERT (M->n + 2 >= M1.n);
+
+	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+		 then either q or q + 1 is a correct quotient, and M1 will
+		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+		 rules out the case that the size of M * M1 is much
+		 smaller than the expected M->n + M1->n. */
+
+	      ASSERT (M->n + M1.n < M->alloc);
+
+	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+	      /* We need a bound for of M->n + M1.n. Let n be the original
+		 input size. Then
+
+		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+		 and it follows that
+
+		 M.n + M1.n <= ceil(n/2) + 1
+
+		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+		 amount of needed scratch space. */
+	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	      success = 1;
+	    }
+	}
+    }
+
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+      if (!nn)
+	return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}

diff --git a/mpn/generic/hgcd_matrix.c b/mpn/generic/hgcd_matrix.c
new file mode 100644
index 0000000..54c795d
--- /dev/null
+++ b/mpn/generic/hgcd_matrix.c

@@ -0,0 +1,265 @@
+/* hgcd_matrix.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Update column COL, adding in Q * column (1-COL). Temporary storage:
+ * qn + n <= M->alloc, where n is the size of the largest element in
+ * column 1 - COL. */
+void
+mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+			  unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+	 from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+	 need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+	{
+	  ASSERT (n > 0);
+	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+	    break;
+	}
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+	{
+	  if (qn <= n)
+	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+	  else
+	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+	  ASSERT (n + qn >= M->n);
+	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+	}
+
+      n += qn;
+
+      if (c[0] | c[1])
+	{
+	  M->p[0][col][n] = c[0];
+	  M->p[1][col][n] = c[1];
+	  n++;
+	}
+      else
+	{
+	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+	  ASSERT (n >= M->n);
+	}
+      M->n = n;
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+void
+mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+		       mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+		     mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller.
+
+     The reason that the product has normalized size >= M->n + M1->n -
+     2 is subtle. It depends on the fact that M and M1 can be factored
+     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+     M ending with a large power and M1 starting with a large power of
+     the same matrix. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+		    M->p[1][0], M->p[1][1], M->n,
+		    M1->p[0][0], M1->p[0][1],
+		    M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
+			mp_size_t n, mp_ptr ap, mp_ptr bp,
+			mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+	n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}

diff --git a/mpn/generic/hgcd_reduce.c b/mpn/generic/hgcd_reduce.c
new file mode 100644
index 0000000..3aee77d
--- /dev/null
+++ b/mpn/generic/hgcd_reduce.c

@@ -0,0 +1,242 @@
+/* hgcd_reduce.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+   to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (bn > 0);
+  ASSERT (an >= bn);
+  ASSERT (rn >= an);
+  ASSERT (an + bn <= rn + 1);
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (an + bn);
+
+  mpn_mul (tp, ap, an, bp, bn);
+  ASSERT ((an + bn <= rn) || (tp[rn] == 0));
+  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
+  TMP_FREE;
+
+  while (rn > an && (rp[rn-1] == 0))
+    rn--;
+
+  return rn;
+}
+
+/* Computes (a, b)  <--  M^{-1} (a; b) */
+/* FIXME:
+    x Take scratch parameter, and figure out scratch need.
+
+    x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+		   mp_ptr ap, mp_ptr bp,
+		   mp_size_t n)
+{
+  mp_size_t an, bn, un, vn, nn;
+  mp_size_t mn[2][2];
+  mp_size_t modn;
+  mp_ptr tp, sp, scratch;
+  mp_limb_t cy;
+  unsigned i, j;
+
+  TMP_DECL;
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+
+  for (i = 0; i < 2; i++)
+    for (j = 0; j < 2; j++)
+      {
+	mp_size_t k;
+	k = M->n;
+	MPN_NORMALIZE (M->p[i][j], k);
+	mn[i][j] = k;
+      }
+
+  ASSERT (mn[0][0] > 0);
+  ASSERT (mn[1][1] > 0);
+  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+  TMP_MARK;
+
+  if (mn[0][1] == 0)
+    {
+      /* A unchanged, M = (1, 0; q, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put B <-- B - q A */
+      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+    }
+  else if (mn[1][0] == 0)
+    {
+      /* B unchanged, M = (1, q; 0, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put A  <-- A - q * B */
+      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+    }
+  else
+    {
+      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
+	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
+      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+      nn = MAX (un, vn);
+      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+      modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+      TMP_ALLOC_LIMBS_3 (tp, modn,
+			 sp, modn,
+			 scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
+
+      ASSERT (n <= 2*modn);
+
+      if (n > modn)
+	{
+	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+	  MPN_INCR_U (ap, modn, cy);
+
+	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+	  MPN_INCR_U (bp, modn, cy);
+
+	  n = modn;
+	}
+
+      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+      /* FIXME: Handle the small n case in some better way. */
+      if (n + mn[1][1] < modn)
+	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+      if (n + mn[0][1] < modn)
+	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+      MPN_COPY (ap, tp, nn);
+      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+      if (n + mn[1][0] < modn)
+	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+      if (n + mn[0][0] < modn)
+	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+      MPN_COPY (bp, tp, nn);
+
+      while ( (ap[nn-1] | bp[nn-1]) == 0)
+	{
+	  nn--;
+	  ASSERT (nn > 0);
+	}
+    }
+  TMP_FREE;
+
+  return nn;
+}
+
+mp_size_t
+mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
+{
+  mp_size_t itch;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      itch = mpn_hgcd_itch (n-p);
+
+      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
+	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
+      if (itch < n + p - 1)
+	itch = n + p - 1;
+    }
+  else
+    {
+      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
+      /* Currently, hgcd_matrix_apply allocates its own storage. */
+    }
+  return itch;
+}
+
+/* FIXME: Document storage need. */
+mp_size_t
+mpn_hgcd_reduce (struct hgcd_matrix *M,
+		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
+		 mp_ptr tp)
+{
+  mp_size_t nn;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+      if (nn > 0)
+	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	   = 2 (n - 1) */
+	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+    }
+  else
+    {
+      MPN_COPY (tp, ap + p, n - p);
+      MPN_COPY (tp + n - p, bp + p, n - p);
+      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
+	return hgcd_matrix_apply (M, ap, bp, n);
+    }
+  return 0;
+}

diff --git a/mpn/generic/hgcd_step.c b/mpn/generic/hgcd_step.c
new file mode 100644
index 0000000..a978a88
--- /dev/null
+++ b/mpn/generic/hgcd_step.c

@@ -0,0 +1,127 @@
+/* hgcd_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void
+hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	   mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+  ASSERT (d <= 1);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+	 gcd_subdiv_step, which stores q at the start of that area. We
+	 now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   (resulting size of M) + 1.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
+   <= N.
+*/
+
+mp_size_t
+mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+	       struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+
+  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
+}

diff --git a/mpn/generic/invert.c b/mpn/generic/invert.c
new file mode 100644
index 0000000..157ff2b
--- /dev/null
+++ b/mpn/generic/invert.c

@@ -0,0 +1,86 @@
+/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2014-2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
+    {
+	/* Maximum scratch needed by this branch: 2*n */
+	mp_ptr xp;
+
+	xp = scratch;				/* 2 * n limbs */
+	/* n > 1 here */
+	MPN_FILL (xp, n, GMP_NUMB_MAX);
+	mpn_com (xp + n, dp, n);
+	if (n == 2) {
+	  mpn_divrem_2 (ip, 0, xp, 4, dp);
+	} else {
+	  gmp_pi1_t inv;
+	  invert_pi1 (inv, dp[n-1], dp[n-2]);
+	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
+	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
+	}
+    }
+  else { /* Use approximated inverse; correct the result if needed. */
+      mp_limb_t e; /* The possible error in the approximate inverse */
+
+      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
+      e = mpn_ni_invertappr (ip, dp, n, scratch);
+
+      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
+	/* Code to detect and correct the "off by one" approximation. */
+	mpn_mul_n (scratch, ip, dp, n);
+	e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
+	if (LIKELY(e)) /* The high part can not give a carry by itself. */
+	  e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
+	/* If the value was wrong (no carry), correct it (increment). */
+	e ^= CNST_LIMB (1);
+	MPN_INCR_U (ip, n, e);
+      }
+  }
+}

diff --git a/mpn/generic/invertappr.c b/mpn/generic/invertappr.c
new file mode 100644
index 0000000..3be5596
--- /dev/null
+++ b/mpn/generic/invertappr.c

@@ -0,0 +1,300 @@
+/* mpn_invertappr and helper functions.  Compute I such that
+   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The algorithm used here was inspired by ApproximateReciprocal from "Modern
+   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
+   thanks to Paul Zimmermann for his very valuable suggestions on all the
+   theoretical aspects during the work on this code.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010, 2012, 2015, 2016 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: The iterative version splits the operand in two slightly unbalanced
+   parts, the use of log_2 (or counting the bits) underestimate the maximum
+   number of iterations.  */
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#define MAYBE_dcpi1_divappr   1
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
+#define MAYBE_dcpi1_divappr \
+  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
+#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
+    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
+#undef  INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
+#endif
+#endif
+
+/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
+   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
+   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
+
+   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
+   following conditions are satisfied by the output:
+     0 <= e <= 1;
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
+   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
+	e=1 means that the result _may_ be one less than expected.
+
+   The _bc version returns e=1 most of the time.
+   The _ni version should return e=0 most of the time; only about 1% of
+   possible random input should give e=1.
+
+   When the strict result is needed, i.e., e=0 in the relation above:
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
+   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
+
+/* Maximum scratch needed by this branch (at xp): 2*n */
+static mp_limb_t
+mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr xp)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, xp, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, xp, mpn_invertappr_itch(n)));
+
+  /* Compute a base value of r limbs. */
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else {
+    /* n > 1 here */
+    MPN_FILL (xp, n, GMP_NUMB_MAX);
+    mpn_com (xp + n, dp, n);
+
+    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
+
+    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
+    if (n == 2) {
+      mpn_divrem_2 (ip, 0, xp, 4, dp);
+    } else {
+      gmp_pi1_t inv;
+      invert_pi1 (inv, dp[n-1], dp[n-2]);
+      if (! MAYBE_dcpi1_divappr
+	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
+	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
+      else
+	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
+      MPN_DECR_U(ip, n, CNST_LIMB (1));
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
+   iterations (at least one).
+
+   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
+   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
+   in version 0.4 of the book.
+
+   Some adaptations were introduced, to allow product mod B^m-1 and return the
+   value e.
+
+   We introduced a correction in such a way that "the value of
+   B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
+   "2B^n-1").
+
+   Maximum scratch needed by this branch <= 2*n, but have to fit 3*rn
+   in the scratch, i.e. 3*rn <= 2*n: we require n>4.
+
+   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
+   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
+   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
+   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
+   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
+   incremented (because A < B^n).
+
+   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
+   is allocated apart.
+ */
+
+mp_limb_t
+mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t cy;
+  mp_size_t rn, mn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_ptr tp;
+  TMP_DECL;
+#define xp scratch
+
+  ASSERT (n > 4);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  rn = n;
+  do {
+    *sizp = rn;
+    rn = (rn >> 1) + 1;
+    ++sizp;
+  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
+
+  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
+  dp += n;
+  ip += n;
+
+  /* Compute a base value of rn limbs. */
+  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
+
+  TMP_MARK;
+
+  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
+    {
+      mn = mpn_mulmod_bnm1_next_size (n + 1);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+    }
+  /* Use Newton's iterations to get the desired precision.*/
+
+  while (1) {
+    n = *--sizp;
+    /*
+      v    n  v
+      +----+--+
+      ^ rn ^
+    */
+
+    /* Compute i_jd . */
+    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
+	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
+      /* FIXME: We do only need {xp,n+1}*/
+      mpn_mul (xp, dp - n, n, ip - rn, rn);
+      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
+      cy = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
+      /* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
+    } else { /* Use B^mn-1 wraparound */
+      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
+      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
+      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
+      /* Add dp*B^rn mod (B^mn-1) */
+      ASSERT (n >= mn - rn);
+      cy = mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
+      cy = mpn_add_nc (xp, xp, dp - (n - (mn - rn)), n - (mn - rn), cy);
+      /* Subtract B^{rn+n}, maybe only compensate the carry*/
+      xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
+      MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
+      MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
+      cy = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
+    }
+
+    if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
+      cy = xp[n]; /* 0 <= cy <= 1 here. */
+#if HAVE_NATIVE_mpn_sublsh1_n
+      if (cy++) {
+	if (mpn_cmp (xp, dp - n, n) > 0) {
+	  mp_limb_t chk;
+	  chk = mpn_sublsh1_n (xp, xp, dp - n, n);
+	  ASSERT (chk == xp[n]);
+	  ++ cy;
+	} else
+	  ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+      }
+#else /* no mpn_sublsh1_n*/
+      if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
+	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
+      }
+#endif
+      /* 1 <= cy <= 3 here. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_rsblsh1_n (xp + n, xp, dp - n, n));
+	++cy;
+      } else
+	ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#else /* no mpn_rsblsh1_n*/
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
+      }
+      ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#endif
+      MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 4 here. */
+    } else { /* "negative" residue class */
+      ASSERT (xp[n] >= GMP_NUMB_MAX - CNST_LIMB(1));
+      MPN_DECR_U(xp, n + 1, cy);
+      if (xp[n] != GMP_NUMB_MAX) {
+	MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
+	ASSERT_CARRY (mpn_add_n (xp, xp, dp - n, n));
+      }
+      mpn_com (xp + 2 * n - rn, xp + n - rn, rn);
+    }
+
+    /* Compute x_ju_j. FIXME:We need {xp+rn,rn}, mulhi? */
+    mpn_mul_n (xp, xp + 2 * n - rn, ip - rn, rn);
+    cy = mpn_add_n (xp + rn, xp + rn, xp + 2 * n - rn, 2 * rn - n);
+    cy = mpn_add_nc (ip - n, xp + 3 * rn - n, xp + n + rn, n - rn, cy);
+    MPN_INCR_U (ip - rn, rn, cy);
+    if (sizp == sizes) { /* Get out of the cycle */
+      /* Check for possible carry propagation from below. */
+      cy = xp[3 * rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
+      /*    cy = mpn_add_1 (xp + rn, xp + rn, 2*rn - n, 4); */
+      break;
+    }
+    rn = n;
+  }
+  TMP_FREE;
+
+  return cy;
+#undef xp
+}
+
+mp_limb_t
+mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
+    return mpn_bc_invertappr (ip, dp, n, scratch);
+  else
+    return mpn_ni_invertappr (ip, dp, n, scratch);
+}

diff --git a/mpn/generic/jacbase.c b/mpn/generic/jacbase.c
new file mode 100644
index 0000000..391ceac
--- /dev/null
+++ b/mpn/generic/jacbase.c

@@ -0,0 +1,242 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+   THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+   INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
+
+Copyright 1999-2002, 2010, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Use the simple loop by default.  The generic count_trailing_zeros is not
+   very fast, and the extra trickery of method 3 has proven to be less use
+   than might have been though.  */
+#ifndef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD  2
+#endif
+
+
+/* Use count_trailing_zeros.  */
+#if JACOBI_BASE_METHOD == 1
+#define PROCESS_TWOS_ANY                                \
+  {                                                     \
+    mp_limb_t  twos;                                    \
+    count_trailing_zeros (twos, a);                     \
+    result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b);        \
+    a >>= twos;                                         \
+  }
+#define PROCESS_TWOS_EVEN  PROCESS_TWOS_ANY
+#endif
+
+/* Use a simple loop.  A disadvantage of this is that there's a branch on a
+   50/50 chance of a 0 or 1 low bit.  */
+#if JACOBI_BASE_METHOD == 2
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two;                           \
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    do                                  \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+    while ((a & 1) == 0);               \
+  }
+#define PROCESS_TWOS_ANY        \
+  if ((a & 1) == 0)             \
+    PROCESS_TWOS_EVEN;
+#endif
+
+/* Process one bit arithmetically, then a simple loop.  This cuts the loop
+   condition down to a 25/75 chance, which should branch predict better.
+   The CPU will need a reasonable variable left shift.  */
+#if JACOBI_BASE_METHOD == 3
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two, mask, shift;              \
+					\
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    mask = (~a & 2);                    \
+    a >>= 1;                            \
+					\
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+    result_bit1 ^= two ^ (two & mask);  \
+					\
+    while ((a & 1) == 0)                \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+  }
+#define PROCESS_TWOS_ANY                \
+  {                                     \
+    int  two, mask, shift;              \
+					\
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+					\
+    mask = shift << 1;                  \
+    result_bit1 ^= (two & mask);        \
+					\
+    while ((a & 1) == 0)                \
+      {                                 \
+	a >>= 1;                        \
+	result_bit1 ^= two;             \
+	ASSERT (a != 0);                \
+      }                                 \
+  }
+#endif
+
+#if JACOBI_BASE_METHOD < 4
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+   with a restricted range of inputs accepted, namely b>1, b odd.
+
+   The initial result_bit1 is taken as a parameter for the convenience of
+   mpz_kronecker_ui() et al.  The sign changes both here and in those
+   routines accumulate nicely in bit 1, see the JACOBI macros.
+
+   The return value here is the normal +1, 0, or -1.  Note that +1 and -1
+   have bit 1 in the "BIT1" sense, which could be useful if the caller is
+   accumulating it into some extended calculation.
+
+   Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+   possible, but a couple of tests suggest it's not a significant speedup,
+   and may even be a slowdown, so what's here is good enough for now. */
+
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+{
+  ASSERT (b & 1);  /* b odd */
+  ASSERT (b != 1);
+
+  if (a == 0)
+    return 0;
+
+  PROCESS_TWOS_ANY;
+  if (a == 1)
+    goto done;
+
+  if (a >= b)
+    goto a_gt_b;
+
+  for (;;)
+    {
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+      MP_LIMB_T_SWAP (a, b);
+
+    a_gt_b:
+      do
+	{
+	  /* working on (a/b), a,b odd, a>=b */
+	  ASSERT (a & 1);
+	  ASSERT (b & 1);
+	  ASSERT (a >= b);
+
+	  if ((a -= b) == 0)
+	    return 0;
+
+	  PROCESS_TWOS_EVEN;
+	  if (a == 1)
+	    goto done;
+	}
+      while (a >= b);
+    }
+
+ done:
+  return JACOBI_BIT1_TO_PN (result_bit1);
+}
+#endif
+
+#if JACOBI_BASE_METHOD == 4
+/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
+ * parameter. We have no need for the convention that the sign is in
+ * bit 1, internally we use bit 0. */
+
+/* FIXME: Could try table-based count_trailing_zeros. */
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
+{
+  int c;
+
+  ASSERT (b & 1);
+  ASSERT (b > 1);
+
+  if (a == 0)
+    /* This is the only line which depends on b > 1 */
+    return 0;
+
+  bit >>= 1;
+
+  /* Below, we represent a and b shifted right so that the least
+     significant one bit is implicit. */
+
+  b >>= 1;
+
+  count_trailing_zeros (c, a);
+  bit ^= c & (b ^ (b >> 1));
+
+  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
+  a >>= c;
+  a >>= 1;
+
+  do
+    {
+      mp_limb_t t = a - b;
+      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
+
+      if (t == 0)
+	return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & a & b);
+
+      /* b <-- min (a, b) */
+      b += (bgta & t);
+
+      /* a <-- |a - b| */
+      a = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (b ^ (b >> 1));
+      a >>= c;
+    }
+  while (a > 0);
+
+  return 1-2*(bit & 1);
+}
+#endif /* JACOBI_BASE_METHOD == 4 */

diff --git a/mpn/generic/jacobi.c b/mpn/generic/jacobi.c
new file mode 100644
index 0000000..d98b126
--- /dev/null
+++ b/mpn/generic/jacobi.c

@@ -0,0 +1,294 @@
+/* jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_DC_THRESHOLD
+#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
+#endif
+
+/* Schönhage's rules:
+ *
+ * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
+ *
+ * If r1 is odd, then
+ *
+ *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
+ *
+ * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
+ *
+ * If r1 is even, r2 must be odd. We have
+ *
+ *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
+ *
+ * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
+ * q1 times gives
+ *
+ *   (r1 | r0) = (r1 | r2) = (r3 | r2)
+ *
+ * On the other hand, if r1 = 2 (mod 4), the sign factor is
+ * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
+ *
+ *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
+ *   = q1 (r0-1)/2 + q1 (q1-1)/2
+ *
+ * and we can summarize the even case as
+ *
+ *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
+ *
+ * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
+ *
+ * What about termination? The remainder sequence ends with (0|1) = 1
+ * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
+ * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
+ * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
+ *
+ * Examples: (11|15) = - (15|11) = - (4|11)
+ *            (4|11) =    (4| 3) =   (1| 3)
+ *            (1| 3) = (3|1) = (0|1) = 1
+ *
+ *             (2|7) = (2|1) = (0|1) = 1
+ *
+ * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
+ *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
+ *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
+ *
+ */
+
+/* In principle, the state consists of four variables: e (one bit), a,
+   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
+   b are the least significant bits of the current remainders. d
+   (denominator) is 0 if we're currently subtracting multiplies of a
+   from b, and 1 if we're subtracting b from a.
+
+   e is stored in the least significant bit, while a, b and d are
+   coded as only 13 distinct values in bits 1-4, according to the
+   following table. For rows not mentioning d, the value is either
+   implied, or it doesn't matter. */
+
+#if WANT_ASSERT
+static const struct
+{
+  unsigned char a;
+  unsigned char b;
+} decode_table[13] = {
+  /*  0 */ { 0, 1 },
+  /*  1 */ { 0, 3 },
+  /*  2 */ { 1, 1 },
+  /*  3 */ { 1, 3 },
+  /*  4 */ { 2, 1 },
+  /*  5 */ { 2, 3 },
+  /*  6 */ { 3, 1 },
+  /*  7 */ { 3, 3 }, /* d = 1 */
+  /*  8 */ { 1, 0 },
+  /*  9 */ { 1, 2 },
+  /* 10 */ { 3, 0 },
+  /* 11 */ { 3, 2 },
+  /* 12 */ { 3, 3 }, /* d = 0 */
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+#endif /* WANT_ASSERT */
+
+const unsigned char jacobi_table[208] = {
+#include "jacobitab.h"
+};
+
+#define BITS_FAIL 31
+
+static void
+jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+	     mp_srcptr qp, mp_size_t qn, int d)
+{
+  unsigned *bitsp = (unsigned *) p;
+
+  if (gp)
+    {
+      ASSERT (gn > 0);
+      if (gn != 1 || gp[0] != 1)
+	{
+	  *bitsp = BITS_FAIL;
+	  return;
+	}
+    }
+
+  if (qp)
+    {
+      ASSERT (qn > 0);
+      ASSERT (d >= 0);
+      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
+    }
+}
+
+#define CHOOSE_P(n) (2*(n) / 3)
+
+int
+mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
+{
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+  ASSERT ( (bp[0] | ap[0]) & 1);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t dc_scratch;
+
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+
+      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (dc_scratch > scratch)
+	scratch = dc_scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(scratch);
+
+  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = 2*n/3;
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
+			    tp + matrix_scratch);
+      if (nn > 0)
+	{
+	  ASSERT (M.n <= (n - p - 1)/2);
+	  ASSERT (M.n + p <= (p + n - 1) / 2);
+	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+	}
+      else
+	{
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
+	  if (!n)
+	    {
+	      TMP_FREE;
+	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+	    }
+	}
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
+	{
+	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
+	  if (!n)
+	    {
+	      TMP_FREE;
+	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+	    }
+	}
+    }
+
+  if (bits >= 16)
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (bp[0] & 1);
+
+  if (n == 1)
+    {
+      mp_limb_t al, bl;
+      al = ap[0];
+      bl = bp[0];
+
+      TMP_FREE;
+      if (bl == 1)
+	return 1 - 2*(bits & 1);
+      else
+	return mpn_jacobi_base (al, bl, bits << 1);
+    }
+
+  else
+    {
+      int res = mpn_jacobi_2 (ap, bp, bits & 1);
+      TMP_FREE;
+      return res;
+    }
+}

diff --git a/mpn/generic/jacobi_2.c b/mpn/generic/jacobi_2.c
new file mode 100644
index 0000000..028b8a4
--- /dev/null
+++ b/mpn/generic/jacobi_2.c

@@ -0,0 +1,351 @@
+/* jacobi_2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000-2004, 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
+   two-limb numbers. */
+#if JACOBI_2_METHOD == 1
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
+  bh >>= 1;
+
+  if ( (bh | bl) == 0)
+    return 1 - 2*(bit & 1);
+
+  if ( (ah | al) == 0)
+    return 0;
+
+  if (al == 0)
+    {
+      al = ah;
+      ah = 0;
+      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+    }
+  count_trailing_zeros (c, al);
+  bit ^= c & (bl ^ (bl >> 1));
+
+  c++;
+  if (UNLIKELY (c == GMP_NUMB_BITS))
+    {
+      al = ah;
+      ah = 0;
+    }
+  else
+    {
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+    }
+  while ( (ah | bh) > 0)
+    {
+      mp_limb_t th, tl;
+      mp_limb_t bgta;
+
+      sub_ddmmss (th, tl, ah, al, bh, bl);
+      if ( (tl | th) == 0)
+	return 0;
+
+      bgta = LIMB_HIGHBIT_TO_MASK (th);
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
+
+      if ( (bh | bl) == 0)
+	return 1 - 2*(bit & 1);
+
+      /* a <-- |a - b| */
+      al = (bgta ^ tl) - bgta;
+      ah = (bgta ^ th);
+
+      if (UNLIKELY (al == 0))
+	{
+	  /* If b > a, al == 0 implies that we have a carry to
+	     propagate. */
+	  al = ah - bgta;
+	  ah = 0;
+	  bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+	}
+      count_trailing_zeros (c, al);
+      c++;
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+	{
+	  al = ah;
+	  ah = 0;
+	}
+      else
+	{
+	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+	  ah >>= c;
+	}
+    }
+
+  ASSERT (bl > 0);
+
+  while ( (al | bl) & GMP_LIMB_HIGHBIT)
+    {
+      /* Need an extra comparison to get the mask. */
+      mp_limb_t t = al - bl;
+      mp_limb_t bgta = - (bl > al);
+
+      if (t == 0)
+	return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      bl += (bgta & t);
+
+      /* a <-- |a - b| */
+      al = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+	return 1 - 2*(bit & 1);
+
+      al >>= c;
+    }
+
+  /* Here we have a little impedance mismatch. Better to inline it? */
+  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
+}
+#elif JACOBI_2_METHOD == 2
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  /* Use bit 1. */
+  bit <<= 1;
+
+  if (bh == 0 && bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  if (al == 0)
+    {
+      if (ah == 0)
+	/* (0|b) = 0, b > 1 */
+	return 0;
+
+      count_trailing_zeros (c, ah);
+      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+      al = bl;
+      bl = ah >> c;
+
+      if (bl == 1)
+	/* (1|b) = 1 */
+	return 1 - (bit & 2);
+
+      ah = bh;
+
+      bit ^= al & bl;
+
+      goto b_reduced;
+    }
+  if ( (al & 1) == 0)
+    {
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+  if (ah == 0)
+    {
+      if (bh > 0)
+	{
+	  bit ^= al & bl;
+	  MP_LIMB_T_SWAP (al, bl);
+	  ah = bh;
+	  goto b_reduced;
+	}
+      goto ab_reduced;
+    }
+
+  while (bh > 0)
+    {
+      /* Compute (a|b) */
+      while (ah > bh)
+	{
+	  sub_ddmmss (ah, al, ah, al, bh, bl);
+	  if (al == 0)
+	    {
+	      count_trailing_zeros (c, ah);
+	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+	      al = bl;
+	      bl = ah >> c;
+	      ah = bh;
+
+	      bit ^= al & bl;
+	      goto b_reduced;
+	    }
+	  count_trailing_zeros (c, al);
+	  bit ^= (c << 1) & (bl ^ (bl >> 1));
+	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+	  ah >>= c;
+	}
+      if (ah == bh)
+	goto cancel_hi;
+
+      if (ah == 0)
+	{
+	  bit ^= al & bl;
+	  MP_LIMB_T_SWAP (al, bl);
+	  ah = bh;
+	  break;
+	}
+
+      bit ^= al & bl;
+
+      /* Compute (b|a) */
+      while (bh > ah)
+	{
+	  sub_ddmmss (bh, bl, bh, bl, ah, al);
+	  if (bl == 0)
+	    {
+	      count_trailing_zeros (c, bh);
+	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
+
+	      bl = bh >> c;
+	      bit ^= al & bl;
+	      goto b_reduced;
+	    }
+	  count_trailing_zeros (c, bl);
+	  bit ^= (c << 1) & (al ^ (al >> 1));
+	  bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
+	  bh >>= c;
+	}
+      bit ^= al & bl;
+
+      /* Compute (a|b) */
+      if (ah == bh)
+	{
+	cancel_hi:
+	  if (al < bl)
+	    {
+	      MP_LIMB_T_SWAP (al, bl);
+	      bit ^= al & bl;
+	    }
+	  al -= bl;
+	  if (al == 0)
+	    return 0;
+
+	  count_trailing_zeros (c, al);
+	  bit ^= (c << 1) & (bl ^ (bl >> 1));
+	  al >>= c;
+
+	  if (al == 1)
+	    return 1 - (bit & 2);
+
+	  MP_LIMB_T_SWAP (al, bl);
+	  bit ^= al & bl;
+	  break;
+	}
+    }
+
+ b_reduced:
+  /* Compute (a|b), with b a single limb. */
+  ASSERT (bl & 1);
+
+  if (bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  while (ah > 0)
+    {
+      ah -= (al < bl);
+      al -= bl;
+      if (al == 0)
+	{
+	  if (ah == 0)
+	    return 0;
+	  count_trailing_zeros (c, ah);
+	  bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+	  al = ah >> c;
+	  goto ab_reduced;
+	}
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+ ab_reduced:
+  ASSERT (bl & 1);
+  ASSERT (bl > 1);
+
+  return mpn_jacobi_base (al, bl, bit);
+}
+#else
+#error Unsupported value for JACOBI_2_METHOD
+#endif

diff --git a/mpn/generic/logops_n.c b/mpn/generic/logops_n.c
new file mode 100644
index 0000000..3adba2c
--- /dev/null
+++ b/mpn/generic/logops_n.c

@@ -0,0 +1,77 @@
+/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifdef OPERATION_and_n
+#define func __MPN(and_n)
+#define call mpn_and_n
+#endif
+
+#ifdef OPERATION_andn_n
+#define func __MPN(andn_n)
+#define call mpn_andn_n
+#endif
+
+#ifdef OPERATION_nand_n
+#define func __MPN(nand_n)
+#define call mpn_nand_n
+#endif
+
+#ifdef OPERATION_ior_n
+#define func __MPN(ior_n)
+#define call mpn_ior_n
+#endif
+
+#ifdef OPERATION_iorn_n
+#define func __MPN(iorn_n)
+#define call mpn_iorn_n
+#endif
+
+#ifdef OPERATION_nior_n
+#define func __MPN(nior_n)
+#define call mpn_nior_n
+#endif
+
+#ifdef OPERATION_xor_n
+#define func __MPN(xor_n)
+#define call mpn_xor_n
+#endif
+
+#ifdef OPERATION_xnor_n
+#define func __MPN(xnor_n)
+#define call mpn_xnor_n
+#endif
+
+void
+func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  call (rp, up, vp, n);
+}

diff --git a/mpn/generic/lshift.c b/mpn/generic/lshift.c
new file mode 100644
index 0000000..7e1fdef
--- /dev/null
+++ b/mpn/generic/lshift.c

@@ -0,0 +1,72 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+    }
+  *--rp = high_limb;
+
+  return retval;
+}

diff --git a/mpn/generic/lshiftc.c b/mpn/generic/lshiftc.c
new file mode 100644
index 0000000..a583602
--- /dev/null
+++ b/mpn/generic/lshiftc.c

@@ -0,0 +1,73 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}

diff --git a/mpn/generic/matrix22_mul.c b/mpn/generic/matrix22_mul.c
new file mode 100644
index 0000000..6a1299a
--- /dev/null
+++ b/mpn/generic/matrix22_mul.c

@@ -0,0 +1,321 @@
+/* matrix22_mul.c.
+
+   Contributed by Niels Möller and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003-2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define MUL(rp, ap, an, bp, bn) do {		\
+  if (an >= bn)					\
+    mpn_mul (rp, ap, an, bp, bn);		\
+  else						\
+    mpn_mul (rp, bp, bn, ap, an);		\
+} while (0)
+
+/* Inputs are unsigned. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  int c;
+  MPN_CMP (c, ap, bp, n);
+  if (c >= 0)
+    {
+      mpn_sub_n (rp, ap, bp, n);
+      return 0;
+    }
+  else
+    {
+      mpn_sub_n (rp, bp, ap, n);
+      return 1;
+    }
+}
+
+static int
+add_signed_n (mp_ptr rp,
+	      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
+{
+  if (as != bs)
+    return as ^ abs_sub_n (rp, ap, bp, n);
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
+      return as;
+    }
+}
+
+mp_size_t
+mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    return 3*rn + 2*mn;
+  else
+    return 3*(rn + mn) + 5;
+}
+
+/* Algorithm:
+
+    / s0 \   /  1  0  0  0 \ / r0 \
+    | s1 |   |  0  1  0  1 | | r1 |
+    | s2 |   |  0  0 -1  1 | | r2 |
+    | s3 | = |  0  1 -1  1 | \ r3 /
+    | s4 |   | -1  1 -1  1 |
+    | s5 |   |  0  1  0  0 |
+    \ s6 /   \  0  0  1  0 /
+
+    / t0 \   /  1  0  0  0 \ / m0 \
+    | t1 |   |  0  1  0  1 | | m1 |
+    | t2 |   |  0  0 -1  1 | | m2 |
+    | t3 | = |  0  1 -1  1 | \ m3 /
+    | t4 |   | -1  1 -1  1 |
+    | t5 |   |  0  1  0  0 |
+    \ t6 /   \  0  0  1  0 /
+
+  Note: the two matrices above are the same, but s_i and t_i are used
+  in the same product, only for i<4, see "A Strassen-like Matrix
+  Multiplication suited for squaring and higher power computation" by
+  M. Bodrato, in Proceedings of ISSAC 2010.
+
+    / r0 \   / 1 0  0  0  0  1  0 \ / s0*t0 \
+    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |
+    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |
+    \ r3 /   \ 0 1  1 -1  0 -1  0 / | s3*t3 |
+				    | s4*t5 |
+				    | s5*t6 |
+				    \ s6*t4 /
+
+  The scheduling uses two temporaries U0 and U1 to store products, and
+  two, S0 and T0, to store combinations of entries of the two
+  operands.
+*/
+
+/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
+ *
+ * Resulting elements are of size up to rn + mn + 1.
+ *
+ * Temporary storage: 3 rn + 3 mn + 5. */
+static void
+mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+			   mp_ptr tp)
+{
+  mp_ptr s0, t0, u0, u1;
+  int r1s, r3s, s0s, t0s, u1s;
+  s0 = tp; tp += rn + 1;
+  t0 = tp; tp += mn + 1;
+  u0 = tp; tp += rn + mn + 1;
+  u1 = tp; /* rn + mn + 2 */
+
+  MUL (u0, r1, rn, m2, mn);		/* u5 = s5 * t6 */
+  r3s = abs_sub_n (r3, r3, r2, rn);	/* r3 - r2 */
+  if (r3s)
+    {
+      r1s = abs_sub_n (r1, r1, r3, rn);
+      r1[rn] = 0;
+    }
+  else
+    {
+      r1[rn] = mpn_add_n (r1, r1, r3, rn);
+      r1s = 0;				/* r1 - r2 + r3  */
+    }
+  if (r1s)
+    {
+      s0[rn] = mpn_add_n (s0, r1, r0, rn);
+      s0s = 0;
+    }
+  else if (r1[rn] != 0)
+    {
+      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
+      s0s = 1;				/* s4 = -r0 + r1 - r2 + r3 */
+					/* Reverse sign! */
+    }
+  else
+    {
+      s0s = abs_sub_n (s0, r0, r1, rn);
+      s0[rn] = 0;
+    }
+  MUL (u1, r0, rn, m0, mn);		/* u0 = s0 * t0 */
+  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
+  ASSERT (r0[rn+mn] < 2);		/* u0 + u5 */
+
+  t0s = abs_sub_n (t0, m3, m2, mn);
+  u1s = r3s^t0s^1;			/* Reverse sign! */
+  MUL (u1, r3, rn, t0, mn);		/* u2 = s2 * t2 */
+  u1[rn+mn] = 0;
+  if (t0s)
+    {
+      t0s = abs_sub_n (t0, m1, t0, mn);
+      t0[mn] = 0;
+    }
+  else
+    {
+      t0[mn] = mpn_add_n (t0, t0, m1, mn);
+    }
+
+  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
+     at r3. I'd expect that for matrices of random size, the high
+     words t0[mn] and r1[rn] are non-zero with a pretty small
+     probability. If that can be confirmed this should be done as an
+     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
+     add_n. */
+  if (t0[mn] != 0)
+    {
+      MUL (r3, r1, rn, t0, mn + 1);	/* u3 = s3 * t3 */
+      ASSERT (r1[rn] < 2);
+      if (r1[rn] != 0)
+	mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+    }
+  else
+    {
+      MUL (r3, r1, rn + 1, t0, mn);
+    }
+
+  ASSERT (r3[rn+mn] < 4);
+
+  u0[rn+mn] = 0;
+  if (r1s^t0s)
+    {
+      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
+      r3s = 0;				/* u3 + u5 */
+    }
+
+  if (t0s)
+    {
+      t0[mn] = mpn_add_n (t0, t0, m0, mn);
+    }
+  else if (t0[mn] != 0)
+    {
+      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
+    }
+  else
+    {
+      t0s = abs_sub_n (t0, t0, m0, mn);
+    }
+  MUL (u0, r2, rn, t0, mn + 1);		/* u6 = s6 * t4 */
+  ASSERT (u0[rn+mn] < 2);
+  if (r1s)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+    }
+  else
+    {
+      r1[rn] += mpn_add_n (r1, r1, r2, rn);
+    }
+  rn++;
+  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
+					/* u3 + u5 + u6 */
+  ASSERT (r2[rn+mn-1] < 4);
+  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
+					/* -u2 + u3 + u5  */
+  ASSERT (r3[rn+mn-1] < 3);
+  MUL (u0, s0, rn, m1, mn);		/* u4 = s4 * t5 */
+  ASSERT (u0[rn+mn-1] < 2);
+  t0[mn] = mpn_add_n (t0, m3, m1, mn);
+  MUL (u1, r1, rn, t0, mn + 1);		/* u1 = s1 * t1 */
+  mn += rn;
+  ASSERT (u1[mn-1] < 4);
+  ASSERT (u1[mn] == 0);
+  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
+					/* -u2 + u3 - u4 + u5  */
+  ASSERT (r1[mn-1] < 2);
+  if (r3s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
+					/* u1 + u2 - u3 - u5  */
+    }
+  ASSERT (r3[mn-1] < 2);
+  if (t0s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
+					/* u1 - u3 - u5 - u6  */
+    }
+  ASSERT (r2[mn-1] < 2);
+}
+
+void
+mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+		  mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+		  mp_ptr tp)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    {
+      mp_ptr p0, p1;
+      unsigned i;
+
+      /* Temporary storage: 3 rn + 2 mn */
+      p0 = tp + rn;
+      p1 = p0 + rn + mn;
+
+      for (i = 0; i < 2; i++)
+	{
+	  MPN_COPY (tp, r0, rn);
+
+	  if (rn >= mn)
+	    {
+	      mpn_mul (p0, r0, rn, m0, mn);
+	      mpn_mul (p1, r1, rn, m3, mn);
+	      mpn_mul (r0, r1, rn, m2, mn);
+	      mpn_mul (r1, tp, rn, m1, mn);
+	    }
+	  else
+	    {
+	      mpn_mul (p0, m0, mn, r0, rn);
+	      mpn_mul (p1, m3, mn, r1, rn);
+	      mpn_mul (r0, m2, mn, r1, rn);
+	      mpn_mul (r1, m1, mn, tp, rn);
+	    }
+	  r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
+	  r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
+
+	  r0 = r2; r1 = r3;
+	}
+    }
+  else
+    mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
+			       m0, m1, m2, m3, mn, tp);
+}

diff --git a/mpn/generic/matrix22_mul1_inverse_vector.c b/mpn/generic/matrix22_mul1_inverse_vector.c
new file mode 100644
index 0000000..68d50b7
--- /dev/null
+++ b/mpn/generic/matrix22_mul1_inverse_vector.c

@@ -0,0 +1,64 @@
+/* matrix22_mul1_inverse_vector.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
+				  mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}

diff --git a/mpn/generic/mod_1.c b/mpn/generic/mod_1.c
new file mode 100644
index 0000000..f737bc2
--- /dev/null
+++ b/mpn/generic/mod_1.c

@@ -0,0 +1,278 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012, 2020
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
+#endif
+
+#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
+/* Duplicates declarations in tune/speed.h */
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+#undef mpn_mod_1_1p
+#define mpn_mod_1_1p(ap, n, b, pre)			     \
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
+      : __gmpn_mod_1_1p (ap, n, b, pre)))
+
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p_cps(pre, b)				\
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
+      : __gmpn_mod_1_1p_cps (pre, b)))
+#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
+
+
+/* The comments in mpn/generic/divrem_1.c apply here too.
+
+   As noted in the algorithms section of the manual, the shifts in the loop
+   for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
+   by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
+   skip a division where (a*2^n)%(d*2^n) can't then there's the same number
+   of divide steps, though how often that happens depends on the assumed
+   distributions of dividend and divisor.  In any case this idea is left to
+   CPU specific implementations to consider.  */
+
+static mp_limb_t
+mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n1, n0, r;
+  mp_limb_t  dummy;
+  int cnt;
+
+  ASSERT (un > 0);
+  ASSERT (d != 0);
+
+  /* Skip a division if high < divisor.  Having the test here before
+     normalizing will still skip as often as possible.  */
+  r = up[un - 1];
+  if (r < d)
+    {
+      if (--un == 0)
+	return r;
+    }
+  else
+    r = 0;
+
+  d <<= GMP_NAIL_BITS;
+
+  /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
+     code above. */
+  if (! UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_qrnnd (dummy, r, r, n0, d);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+
+  count_leading_zeros (cnt, d);
+  d <<= cnt;
+
+  n1 = up[un - 1] << GMP_NAIL_BITS;
+  r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
+
+  if (UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      mp_limb_t nshift;
+      for (i = un - 2; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_qrnnd (dummy, r, r, nshift, d);
+	  r >>= GMP_NAIL_BITS;
+	  n1 = n0;
+	}
+      udiv_qrnnd (dummy, r, r, n1 << cnt, d);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+  else
+    {
+      mp_limb_t inv, nshift;
+      invert_limb (inv, d);
+
+      for (i = un - 2; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_rnnd_preinv (r, r, nshift, d, inv);
+	  r >>= GMP_NAIL_BITS;
+	  n1 = n0;
+	}
+      udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+}
+
+static mp_limb_t
+mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n0, r;
+  mp_limb_t  dummy;
+
+  ASSERT (un > 0);
+
+  d <<= GMP_NAIL_BITS;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  /* High limb is initial remainder, possibly with one subtract of
+     d to get r<d.  */
+  r = up[un - 1] << GMP_NAIL_BITS;
+  if (r >= d)
+    r -= d;
+  r >>= GMP_NAIL_BITS;
+  un--;
+  if (un == 0)
+    return r;
+
+  if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_qrnnd (dummy, r, r, n0, d);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+  else
+    {
+      mp_limb_t  inv;
+      invert_limb (inv, d);
+      for (i = un - 1; i >= 0; i--)
+	{
+	  n0 = up[i] << GMP_NAIL_BITS;
+	  udiv_rnnd_preinv (r, r, n0, d, inv);
+	  r >>= GMP_NAIL_BITS;
+	}
+      return r;
+    }
+}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  ASSERT (n >= 0);
+  ASSERT (b != 0);
+
+  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
+     required by mpz/fdiv_r_ui.c and possibly other places.  */
+  if (n == 0)
+    return 0;
+
+  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+    {
+      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+	{
+	  return mpn_mod_1_norm (ap, n, b);
+	}
+      else
+	{
+	  mp_limb_t pre[4];
+	  mpn_mod_1_1p_cps (pre, b);
+	  return mpn_mod_1_1p (ap, n, b, pre);
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+	{
+	  return mpn_mod_1_unnorm (ap, n, b);
+	}
+      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+	{
+	  mp_limb_t pre[4];
+	  mpn_mod_1_1p_cps (pre, b);
+	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+	}
+      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+	{
+	  mp_limb_t pre[5];
+	  mpn_mod_1s_2p_cps (pre, b);
+	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+	}
+      else
+	{
+	  mp_limb_t pre[7];
+	  mpn_mod_1s_4p_cps (pre, b);
+	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+	}
+    }
+}

diff --git a/mpn/generic/mod_1_1.c b/mpn/generic/mod_1_1.c
new file mode 100644
index 0000000..be199ff
--- /dev/null
+++ b/mpn/generic/mod_1_1.c

@@ -0,0 +1,341 @@
+/* mpn_mod_1_1p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2011, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__) && ! defined (NO_ASM)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %k2\n\t"					\
+	     "adc	%4, %k1\n\t"					\
+	     "sbb	%k0, %k0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %q2\n\t"					\
+	     "adc	%4, %q1\n\t"					\
+	     "sbb	%q0, %q0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxcc	%r3, %4, %1\n\t"				\
+	     "subx	%%g0, %%g0, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addccc	%r7, %8, %%g0\n\t"				\
+	     "addccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
+	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
+	 __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_mssaaaa
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxccc	%r3, %4, %1\n\t"				\
+	     "clr	%0\n\t"						\
+	     "movcs	%%xcc, -1, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
+	     "adde	%1, %3, %4\n\t"					\
+	     "subfe	%0, %0, %0\n\t"					\
+	     "nor	%0, %0, %0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0)			\
+	     __CLOBBER_CC)
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "algr	%2, %6\n\t"					\
+	     "alcgr	%1, %4\n\t"					\
+	     "lghi	%0, 0\n\t"					\
+	     "alcgr	%0, %0\n\t"					\
+	     "lcgr	%0, %0"						\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
+	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %5, %6\n\t"					\
+	     "adcs	%1, %3, %4\n\t"					\
+	     "movcc	%0, #0\n\t"					\
+	     "movcs	%0, #-1"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "adds	%2, %x5, %6\n\t"				\
+	     "adcs	%1, %x3, %x4\n\t"				\
+	     "csinv	%0, xzr, xzr, cc\n\t"				\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rZ" (ah), "rZ" (bh), "%rZ" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  do {									\
+    UWtype __s0, __s1, __c0, __c1;					\
+    __s0 = (a0) + (b0);							\
+    __s1 = (a1) + (b1);							\
+    __c0 = __s0 < (a0);							\
+    __c1 = __s1 < (a1);							\
+    (s0) = __s0;							\
+    __s1 = __s1 + __c0;							\
+    (s1) = __s1;							\
+    (m) = - (__c1 + (__s1 < __c0));					\
+  } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b;
+  if (LIKELY (cnt != 0))
+    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  /* In the normalized case, this can be simplified to
+   *
+   *   B2modb = - b * bi;
+   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+   */
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+  mp_limb_t rh, rl, bi, ph, pl, r;
+  mp_limb_t B1modb, B2modb;
+  mp_size_t i;
+  int cnt;
+  mp_limb_t mask;
+
+  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
+
+  B1modb = bmodb[2];
+  B2modb = bmodb[3];
+
+  rl = ap[n - 1];
+  umul_ppmm (ph, pl, rl, B1modb);
+  add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
+
+  for (i = n - 3; i >= 0; i -= 1)
+    {
+      /* rr = ap[i]				< B
+	    + LO(rr)  * (B mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^2 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
+
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  cnt = bmodb[1];
+  bi = bmodb[0];
+
+  if (LIKELY (cnt != 0))
+    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+
+  mask = -(mp_limb_t) (rh >= b);
+  rh -= mask & b;
+
+  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t B1modb = -b;
+      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+      cps[2] = B1modb >> cnt;
+    }
+  B2modb = - b * bi;
+  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+  cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+{
+  int cnt;
+  mp_limb_t bi, B1modb;
+  mp_limb_t r0, r1;
+  mp_limb_t r;
+
+  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
+
+  r0 = ap[n-2];
+  r1 = ap[n-1];
+
+  if (n > 2)
+    {
+      mp_limb_t B2modb, B2mb;
+      mp_limb_t p0, p1;
+      mp_limb_t r2;
+      mp_size_t j;
+
+      B2modb = bmodb[3];
+      B2mb = B2modb - b;
+
+      umul_ppmm (p1, p0, r1, B2modb);
+      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+      for (j = n-4; j >= 0; j--)
+	{
+	  mp_limb_t cy;
+	  /* mp_limb_t t = r0 + B2mb; */
+	  umul_ppmm (p1, p0, r1, B2modb);
+
+	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+	  /* Alternative, for cmov: if (cy) r0 = t; */
+	  r0 -= (-cy) & b;
+	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+	}
+
+      r1 -= (r2 & b);
+    }
+
+  cnt = bmodb[1];
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t t;
+      mp_limb_t B1modb = bmodb[2];
+
+      umul_ppmm (r1, t, r1, B1modb);
+      r0 += t;
+      r1 += (r0 < t);
+
+      /* Normalize */
+      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+      r0 <<= cnt;
+
+      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
+    }
+  else
+    {
+      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
+      r1 -= mask & b;
+    }
+
+  bi = bmodb[0];
+
+  udiv_rnnd_preinv (r, r1, r0, b, bi);
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 2 */

diff --git a/mpn/generic/mod_1_2.c b/mpn/generic/mod_1_2.c
new file mode 100644
index 0000000..b00d19e
--- /dev/null
+++ b/mpn/generic/mod_1_2.c

@@ -0,0 +1,148 @@
+/* mpn_mod_1s_2p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 2.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 2);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 4; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+	{
+	  rl = ap[n - 1];
+	  bi = cps[0];
+	  cnt = cps[1];
+	  udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
+			     rl << cnt, b, bi);
+	  return r >> cnt;
+	}
+
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n--;
+    }
+  else
+    {
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+    }
+
+  for (i = n - 4; i >= 0; i -= 2)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^2 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, rl, B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/mpn/generic/mod_1_3.c b/mpn/generic/mod_1_3.c
new file mode 100644
index 0000000..e4a908d
--- /dev/null
+++ b/mpn/generic/mod_1_3.c

@@ -0,0 +1,155 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 3.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 3);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+  cps[5] = B4modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 5; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+
+  /* We compute n mod 3 in a tricky way, which works except for when n is so
+     close to the maximum size that we don't need to support it.  The final
+     cast to int is a workaround for HP cc.  */
+  switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    default:	/* n mod 3 = 1; (case 2)*/
+      rh = 0;
+      rl = ap[--n];
+      break;
+    case 1:	/* n mod 3 = 2 */
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+      n -= 2;
+      break;
+    }
+
+  for (i = n - 3; i >= 0; i -= 3)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^3 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B4modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/mpn/generic/mod_1_4.c b/mpn/generic/mod_1_4.c
new file mode 100644
index 0000000..80b42ba
--- /dev/null
+++ b/mpn/generic/mod_1_4.c

@@ -0,0 +1,170 @@
+/* mpn_mod_1s_4p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 4.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+  cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+  cps[5] = B4modb >> cnt;
+
+  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
+  cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 6; i++)
+      {
+	b += cps[i];
+	ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+  B5modb = cps[6];
+
+  switch (n & 3)
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 3], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
+      umul_ppmm (ch, cl, ap[n - 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+      umul_ppmm (rh, rl, ap[n - 1], B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 4;
+      break;
+    case 1:
+      rh = 0;
+      rl = ap[n - 1];
+      n -= 1;
+      break;
+    case 2:
+      rh = ap[n - 1];
+      rl = ap[n - 2];
+      n -= 2;
+      break;
+    case 3:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    }
+
+  for (i = n - 4; i >= 0; i -= 4)
+    {
+      /* rr = ap[i]				< B
+	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
+	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
+	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
+	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
+	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, ap[i + 3], B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B4modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B5modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}

diff --git a/mpn/generic/mod_34lsub1.c b/mpn/generic/mod_34lsub1.c
new file mode 100644
index 0000000..af9c6c6
--- /dev/null
+++ b/mpn/generic/mod_34lsub1.c

@@ -0,0 +1,128 @@
+/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
+   The remainder is not fully reduced, it's any limb value congruent to
+   {p,n} modulo that divisor.
+
+   This implementation is only correct when GMP_NUMB_BITS is a multiple of
+   4.
+
+   FIXME: If GMP_NAIL_BITS is some silly big value during development then
+   it's possible the carry accumulators c0,c1,c2 could overflow.
+
+   General notes:
+
+   The basic idea is to use a set of N accumulators (N=3 in this case) to
+   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
+   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
+   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
+   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
+
+   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
+   a few more primes than a single accumulator N=1 does, and for no extra
+   cost (assuming the processor has a decent number of registers).
+
+   For strange nailified values of GMP_NUMB_BITS the idea would be to look
+   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
+   the choices for M may be opened up a bit.  But such things are probably
+   best done in separate code, not grafted on here.  */
+
+#if GMP_NUMB_BITS % 4 == 0
+
+#define B1  (GMP_NUMB_BITS / 4)
+#define B2  (B1 * 2)
+#define B3  (B1 * 3)
+
+#define M1  ((CNST_LIMB(1) << B1) - 1)
+#define M2  ((CNST_LIMB(1) << B2) - 1)
+#define M3  ((CNST_LIMB(1) << B3) - 1)
+
+#define LOW0(n)      ((n) & M3)
+#define HIGH0(n)     ((n) >> B3)
+
+#define LOW1(n)      (((n) & M2) << B1)
+#define HIGH1(n)     ((n) >> B2)
+
+#define LOW2(n)      (((n) & M1) << B2)
+#define HIGH2(n)     ((n) >> B1)
+
+#define PARTS0(n)    (LOW0(n) + HIGH0(n))
+#define PARTS1(n)    (LOW1(n) + HIGH1(n))
+#define PARTS2(n)    (LOW2(n) + HIGH2(n))
+
+#define ADD(c,a,val)                    \
+  do {                                  \
+    mp_limb_t  new_c;                   \
+    ADDC_LIMB (new_c, a, a, val);       \
+    (c) += new_c;                       \
+  } while (0)
+
+mp_limb_t
+mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+  mp_limb_t  c0, c1, c2;
+  mp_limb_t  a0, a1, a2;
+
+  ASSERT (n >= 1);
+  ASSERT (n/3 < GMP_NUMB_MAX);
+
+  a0 = a1 = a2 = 0;
+  c0 = c1 = c2 = 0;
+
+  while ((n -= 3) >= 0)
+    {
+      ADD (c0, a0, p[0]);
+      ADD (c1, a1, p[1]);
+      ADD (c2, a2, p[2]);
+      p += 3;
+    }
+
+  if (n != -3)
+    {
+      ADD (c0, a0, p[0]);
+      if (n != -2)
+	ADD (c1, a1, p[1]);
+    }
+
+  return
+    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
+    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
+}
+
+#endif

diff --git a/mpn/generic/mode1o.c b/mpn/generic/mode1o.c
new file mode 100644
index 0000000..9ba0ae1
--- /dev/null
+++ b/mpn/generic/mode1o.c

@@ -0,0 +1,235 @@
+/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Calculate an r satisfying
+
+           r*B^k + a - c == q*d
+
+   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+   (the caller won't know which), and q is the quotient (discarded).  d must
+   be odd, c can be any limb value.
+
+   If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
+
+   This slightly strange function suits the initial Nx1 reduction for GCDs
+   or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
+   -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
+   ignored, or for the Jacobi symbol it can be accounted for.  The function
+   also suits divisibility and congruence testing since if r=0 (or r=d) is
+   obtained then a==c mod d.
+
+
+   r is a bit like the remainder returned by mpn_divexact_by3c, and is the
+   sort of remainder mpn_divexact_1 might return.  Like mpn_divexact_by3c, r
+   represents a borrow, since effectively quotient limbs are chosen so that
+   subtracting that multiple of d from src at each step will produce a zero
+   limb.
+
+   A long calculation can be done piece by piece from low to high by passing
+   the return value from one part as the carry parameter to the next part.
+   The effective final k becomes anything between size and size-n, if n
+   pieces are used.
+
+
+   A similar sort of routine could be constructed based on adding multiples
+   of d at each limb, much like redc in mpz_powm does.  Subtracting however
+   has a small advantage that when subtracting to cancel out l there's never
+   a borrow into h, whereas using an addition would put a carry into h
+   depending whether l==0 or l!=0.
+
+
+   In terms of efficiency, this function is similar to a mul-by-inverse
+   mpn_mod_1.  Both are essentially two multiplies and are best suited to
+   CPUs with low latency multipliers (in comparison to a divide instruction
+   at least.)  But modexact has a few less supplementary operations, only
+   needs low part and high part multiplies, and has fewer working quantities
+   (helping CPUs with few registers).
+
+
+   In the main loop it will be noted that the new carry (call it r) is the
+   sum of the high product h and any borrow from l=s-c.  If c<d then we will
+   have r<d too, for the following reasons.  Let q=l*inverse be the quotient
+   limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS.  Now if h=d-1 then
+
+       l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
+
+   But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
+   never have h=d-1 and so r=h+borrow <= d-1.
+
+   When c>=d, on the other hand, h=d-1 can certainly occur together with a
+   borrow, thereby giving only r<=d, as per the function definition above.
+
+   As a design decision it's left to the caller to check for r=d if it might
+   be passing c>=d.  Several applications have c<d initially so the extra
+   test is often unnecessary, for example the GCDs or a plain divisibility
+   d|a test will pass c=0.
+
+
+   The special case for size==1 is so that it can be assumed c<=d in the
+   high<=divisor test at the end.  c<=d is only guaranteed after at least
+   one iteration of the main loop.  There's also a decent chance one % is
+   faster than a binvert_limb, though that will depend on the processor.
+
+   A CPU specific implementation might want to omit the size==1 code or the
+   high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither
+   useful.  */
+
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
+                     mp_limb_t orig_c)
+{
+  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
+  mp_limb_t  c = orig_c;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (d);
+  ASSERT_LIMB (c);
+
+  if (size == 1)
+    {
+      s = src[0];
+      if (s > c)
+	{
+	  l = s-c;
+	  h = l % d;
+	  if (h != 0)
+	    h = d - h;
+	}
+      else
+	{
+	  l = c-s;
+	  h = l % d;
+	}
+      return h;
+    }
+
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  i = 0;
+  do
+    {
+      s = src[i];
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+    }
+  while (++i < size-1);
+
+
+  s = src[i];
+  if (s <= d)
+    {
+      /* With high<=d the final step can be a subtract and addback.  If c==0
+	 then the addback will restore to l>=0.  If c==d then will get l==d
+	 if s==0, but that's ok per the function definition.  */
+
+      l = c - s;
+      if (c < s)
+	l += d;
+
+      ret = l;
+    }
+  else
+    {
+      /* Can't skip a divide, just do the loop code once more. */
+
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+      ret = c;
+    }
+
+  ASSERT (orig_c < d ? ret < d : ret <= d);
+  return ret;
+}
+
+
+
+#if 0
+
+/* The following is an alternate form that might shave one cycle on a
+   superscalar processor since it takes c+=h off the dependent chain,
+   leaving just a low product, high product, and a subtract.
+
+   This is for CPU specific implementations to consider.  A special case for
+   high<divisor and/or size==1 can be added if desired.
+
+   Notice that c is only ever 0 or 1, since if s-c produces a borrow then
+   x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
+   c=(x==0xFF..FF) too, if that helped.  */
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
+{
+  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
+  mp_limb_t  c = 0;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  for (i = 0; i < size; i++)
+    {
+      ASSERT (c==0 || c==1);
+
+      s = src[i];
+      SUBC_LIMB (c1, x, s, c);
+
+      SUBC_LIMB (c2, y, x, h);
+      c = c1 + c2;
+
+      y = (y * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, y, dmul);
+    }
+
+  h += c;
+  return h;
+}
+
+#endif

diff --git a/mpn/generic/mu_bdiv_q.c b/mpn/generic/mu_bdiv_q.c
new file mode 100644
index 0000000..0ef3bd8
--- /dev/null
+++ b/mpn/generic/mu_bdiv_q.c

@@ -0,0 +1,281 @@
+/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
+   storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
+   overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+		 D >= 1
+		 D odd
+		 dn >= 2
+		 nn >= 2
+		 scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+	  particular, when dn==in, tp and rp could use the same space.
+   FIXME: Trim final quotient calculation to qn limbs of precision.
+*/
+static void
+mpn_mu_bdiv_q_old (mp_ptr qp,
+	       mp_srcptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  int cy, c0;
+  mp_size_t tn, wn;
+
+  qn = nn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+			|________|   divisor  */
+
+#define ip           scratch			/* in */
+#define rp           (scratch + in)		/* dn or rest >= binvert_itch(in) */
+#define tp           (scratch + in + dn)	/* dn+in or next_size(dn) */
+#define scratch_out  (scratch + in + dn + tn)	/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 limbs of R dies at that point.  We could save memory by letting T live
+	 just under R, and let the upper part of T expand into R. These changes
+	 should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, rp);
+
+      cy = 0;
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      mpn_mullo_n (qp, rp, ip, in);
+      qn -= in;
+
+      while (qn > in)
+	{
+	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
+	  else
+	    {
+	      tn = mpn_mulmod_bnm1_next_size (dn);
+	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	      wn = dn + in - tn;		/* number of wrapped limbs */
+	      if (wn > 0)
+		{
+		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+		  mpn_decr_u (tp + wn, c0);
+		}
+	    }
+
+	  qp += in;
+	  if (dn != in)
+	    {
+	      /* Subtract tp[dn-1...in] from partial remainder.  */
+	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	      if (cy == 2)
+		{
+		  mpn_incr_u (tp + dn, 1);
+		  cy = 1;
+		}
+	    }
+	  /* Subtract tp[dn+in-1...dn] from dividend.  */
+	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+	  np += in;
+	  mpn_mullo_n (qp, rp, ip, in);
+	  qn -= in;
+	}
+
+      /* Generate last qn limbs.
+	 FIXME: It should be possible to limit precision here, since qn is
+	 typically somewhat smaller than dn.  No big gains expected.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      qp += in;
+      if (dn != in)
+	{
+	  cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	  if (cy == 2)
+	    {
+	      mpn_incr_u (tp + dn, 1);
+	      cy = 1;
+	    }
+	}
+
+      mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
+      mpn_mullo_n (qp, rp, ip, qn);
+
+#undef ip
+#undef rp
+#undef tp
+#undef scratch_out
+   }
+  else
+    {
+      /* |_______________________|   dividend
+		|________________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* qn+in or next_size(qn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, qn, qp, in);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (qn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
+	  wn = qn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_cmp (tp, np, wn) < 0;
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      mpn_sub_n (tp, np + in, tp + in, qn - in);
+      mpn_mullo_n (qp + in, tp, ip, qn - in);	/* high qn-in quotient limbs */
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+void
+mpn_mu_bdiv_q (mp_ptr qp,
+	       mp_srcptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
+  mpn_neg (qp, qp, nn);
+}
+
+mp_size_t
+mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	{
+	  tn = dn + in;
+	  itch_out = 0;
+	}
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+	}
+      itches = dn + tn + itch_out;
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	{
+	  tn = qn + in;
+	  itch_out = 0;
+	}
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (qn);
+	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+	}
+      itches = tn + itch_out;
+    }
+
+  itch_binvert = mpn_binvert_itch (in);
+  return in + MAX (itches, itch_binvert);
+}

diff --git a/mpn/generic/mu_bdiv_qr.c b/mpn/generic/mu_bdiv_qr.c
new file mode 100644
index 0000000..540ad73
--- /dev/null
+++ b/mpn/generic/mu_bdiv_qr.c

@@ -0,0 +1,312 @@
+/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
+   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
+   and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+		 D >= 1
+		 D odd
+		 dn >= 2
+		 nn >= 2
+		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn-dn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+	  particular, when dn==in, tp and rp could use the same space.
+*/
+static mp_limb_t
+mpn_mu_bdiv_qr_old (mp_ptr qp,
+		    mp_ptr rp,
+		    mp_srcptr np, mp_size_t nn,
+		    mp_srcptr dp, mp_size_t dn,
+		    mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  mp_limb_t cy, c0;
+  mp_size_t tn, wn;
+
+  qn = nn - dn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+			|________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 limbs of R dies at that point.  We could save memory by letting T live
+	 just under R, and let the upper part of T expand into R. These changes
+	 should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, tp);
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      cy = 0;
+
+      while (qn > in)
+	{
+	  mpn_mullo_n (qp, rp, ip, in);
+
+	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
+	  else
+	    {
+	      tn = mpn_mulmod_bnm1_next_size (dn);
+	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	      wn = dn + in - tn;		/* number of wrapped limbs */
+	      if (wn > 0)
+		{
+		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+		  mpn_decr_u (tp + wn, c0);
+		}
+	    }
+
+	  qp += in;
+	  qn -= in;
+
+	  if (dn != in)
+	    {
+	      /* Subtract tp[dn-1...in] from partial remainder.  */
+	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+	      if (cy == 2)
+		{
+		  mpn_incr_u (tp + dn, 1);
+		  cy = 1;
+		}
+	    }
+	  /* Subtract tp[dn+in-1...dn] from dividend.  */
+	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+	  np += in;
+	}
+
+      /* Generate last qn limbs.  */
+      mpn_mullo_n (qp, rp, ip, qn);
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+	  wn = dn + qn - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      if (dn != qn)
+	{
+	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+	  if (cy == 2)
+	    {
+	      mpn_incr_u (tp + dn, 1);
+	      cy = 1;
+	    }
+	}
+      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+  else
+    {
+      /* |_______________________|   dividend
+		|________________|   divisor  */
+
+#define ip           scratch		/* in */
+#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      qp += in;
+      qn -= in;
+
+      cy = mpn_sub_n (rp, np + in, tp + in, dn);
+      mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+	  wn = dn + qn - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+	      mpn_decr_u (tp + wn, c0);
+	    }
+	}
+
+      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+      if (cy == 2)
+	{
+	  mpn_incr_u (tp + dn, 1);
+	  cy = 1;
+	}
+      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+mp_limb_t
+mpn_mu_bdiv_qr (mp_ptr qp,
+		mp_ptr rp,
+		mp_srcptr np, mp_size_t nn,
+		mp_srcptr dp, mp_size_t dn,
+		mp_ptr scratch)
+{
+  mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
+
+  /* R' B^{qn} = U - Q' D
+   *
+   * Q = B^{qn} - Q' (assuming Q' != 0)
+   *
+   * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
+   *          = B^{qn} D + R'
+   */
+
+  if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
+    {
+      /* Zero quotient. */
+      ASSERT (cy == 0);
+      return 0;
+    }
+  else
+    {
+      mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
+      ASSERT (cy2 >= cy);
+
+      return cy2 - cy;
+    }
+}
+
+
+mp_size_t
+mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+    }
+
+  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+    {
+      tn = dn + in;
+      itch_out = 0;
+    }
+  else
+    {
+      tn = mpn_mulmod_bnm1_next_size (dn);
+      itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+    }
+
+  itch_binvert = mpn_binvert_itch (in);
+  itches = tn + itch_out;
+  return in + MAX (itches, itch_binvert);
+}

diff --git a/mpn/generic/mu_div_q.c b/mpn/generic/mu_div_q.c
new file mode 100644
index 0000000..44cfb40
--- /dev/null
+++ b/mpn/generic/mu_div_q.c

@@ -0,0 +1,184 @@
+/* mpn_mu_div_q.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/*
+  Things to work on:
+
+  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
+     probably close to optimal, except when mpn_mu_divappr_q fails.
+
+  2. We used to fall back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     Unfortunately, since mpn_mu_divappr_q does not return the partial
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
+     solve that.
+
+  3. The allocations done here should be made from the scratch area, which
+     then would need to be amended.
+*/
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_div_q (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_ptr scratch)
+{
+  mp_ptr tp, rp;
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn;
+
+  tp = TMP_BALLOC_LIMBS (qn + 1);
+
+  if (qn >= dn)			/* nn >= 2*dn + 1 */
+    {
+       /* |_______________________|   dividend
+			 |________|   divisor  */
+
+      rp = TMP_BALLOC_LIMBS (nn + 1);
+      MPN_COPY (rp + 1, np, nn);
+      rp[0] = 0;
+
+      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
+      if (qh != 0)
+	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+
+      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+
+      if (UNLIKELY (cy != 0))
+	{
+	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
+	     canonically reduced, replace the returned value of B^(qn-dn)+eps
+	     by the largest possible value.  */
+	  mp_size_t i;
+	  for (i = 0; i < qn + 1; i++)
+	    tp[i] = GMP_NUMB_MAX;
+	}
+
+      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
+	 smaller than the max error, we cannot trust the quotient.  */
+      if (tp[0] > 4)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+	  mp_ptr pp;
+
+	  pp = rp;
+	  mpn_mul (pp, tp + 1, qn, dp, dn);
+
+	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+  else
+    {
+       /* |_______________________|   dividend
+		 |________________|   divisor  */
+
+      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
+	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
+	 the most significant dn-1 limbs will actually be read, but it is not
+	 pretty.  */
+
+      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
+			     dp + dn - (qn + 1), qn + 1, scratch);
+
+      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
+         error from the divisor truncation.  */
+      if (tp[0] > 6)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+
+	  /* FIXME: a shorter product should be enough; we may use already
+	     allocated space... */
+	  rp = TMP_BALLOC_LIMBS (nn);
+	  mpn_mul (rp, dp, dn, tp + 1, qn);
+
+	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_size_t
+mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn;
+
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
+    }
+  else
+    {
+      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
+    }
+}

diff --git a/mpn/generic/mu_div_qr.c b/mpn/generic/mu_div_qr.c
new file mode 100644
index 0000000..8b9c702
--- /dev/null
+++ b/mpn/generic/mu_div_qr.c

@@ -0,0 +1,417 @@
+/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.
+
+   Compute Q = floor(N / D) and R = N-QD.  N is nn limbs and D is dn limbs and
+   must be normalized, and Q must be nn-dn limbs.  The requirement that Q is
+   nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to
+   let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+ Things to work on:
+
+  * This isn't optimal when the quotient isn't needed, as it might take a lot
+    of space.  The computation is always needed, though, so there is no time to
+    save with special code.
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+
+/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly.  It gives a
+   speedup according to old measurements, but does the decision mechanism
+   really make sense?  It seem like the quotient between dn and qn might be
+   what we really should be checking.  */
+#ifndef MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 100
+#endif
+
+#ifdef CHECK				/* FIXME: Enable in minithres */
+#undef  MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 1
+#endif
+
+
+static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
+
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+	       mp_ptr rp,
+	       mp_srcptr np,
+	       mp_size_t nn,
+	       mp_srcptr dp,
+	       mp_size_t dn,
+	       mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+
+  qn = nn - dn;
+  if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+    {
+      /* |______________|_ign_first__|   dividend			  nn
+		|_______|_ign_first__|   divisor			  dn
+
+		|______|	     quotient (prel)			  qn
+
+		 |___________________|   quotient * ignored-divisor-part  dn-1
+      */
+
+      /* Compute a preliminary quotient and a partial remainder by dividing the
+	 most significant limbs of each operand.  */
+      qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+			   np + nn - (2 * qn + 1), 2 * qn + 1,
+			   dp + dn - (qn + 1), qn + 1,
+			   scratch);
+
+      /* Multiply the quotient by the divisor limbs ignored above.  */
+      if (dn - (qn + 1) > qn)
+	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
+      else
+	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
+
+      if (qh)
+	cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+      else
+	cy = 0;
+      scratch[dn - 1] = cy;
+
+      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+		       rp + nn - (2 * qn + 1),
+		       scratch + nn - (2 * qn + 1),
+		       qn + 1, cy);
+      if (cy)
+	{
+	  qh -= mpn_sub_1 (qp, qp, qn, 1);
+	  mpn_add_n (rp, rp, dp, dn);
+	}
+    }
+  else
+    {
+      qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+    }
+
+  return qh;
+}
+
+static mp_limb_t
+mpn_mu_div_qr2 (mp_ptr qp,
+		mp_ptr rp,
+		mp_srcptr np,
+		mp_size_t nn,
+		mp_srcptr dp,
+		mp_size_t dn,
+		mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_div_qr_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+	MPN_ZERO (ip, in);
+      else
+	{
+	  mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_mu_div_qr (mp_ptr qp,
+		      mp_ptr rp,
+		      mp_srcptr np,
+		      mp_size_t nn,
+		      mp_srcptr dp,
+		      mp_size_t dn,
+		      mp_srcptr ip,
+		      mp_size_t in,
+		      mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define tp           scratch
+#define scratch_out  (scratch + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY_INCR (rp, np, dn);
+
+  /* if (qn == 0) */			/* The while below handles this case */
+  /*   return qh; */			/* Degenerate use.  Should we allow this? */
+
+  while (qn > 0)
+    {
+      if (qn < in)
+	{
+	  ip += in - qn;
+	  in = qn;
+	}
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+	 by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+	 subtracted from the partial remainder combined with new limbs from the
+	 dividend N.  We only really need the low dn+1 limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+	      ASSERT_ALWAYS (cx >= cy);
+	      mpn_incr_u (tp, cx - cy);
+	    }
+	}
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+	 limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+	{
+	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */
+	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */
+	}
+      else
+	{
+	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */
+	}
+
+      STAT (int i; int err = 0;
+	    static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+	{
+	  /* We loop 0 times with about 69% probability, 1 time with about 31%
+	     probability, 2 times with about 0.6% probability, if inverse is
+	     computed as recommended.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  r -= cy;
+	  STAT (err++);
+	}
+      if (mpn_cmp (rp, dp, dn) >= 0)
+	{
+	  /* This is executed with about 76% probability.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  STAT (err++);
+	}
+
+      STAT (
+	    tot++;
+	    errarr[err]++;
+	    if (err > err_rec)
+	      err_rec = err;
+	    if (tot % 0x10000 == 0)
+	      {
+		for (i = 0; i <= err_rec; i++)
+		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+		printf ("\n");
+	      }
+	    );
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+  mp_size_t itch_preinv = mpn_preinv_mu_div_qr_itch (nn, dn, in);
+  mp_size_t itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+  ASSERT (itch_preinv >= itch_invapp);
+  return in + MAX (itch_invapp, itch_preinv);
+}
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+  return itch_local + itch_out;
+}

diff --git a/mpn/generic/mu_divappr_q.c b/mpn/generic/mu_divappr_q.c
new file mode 100644
index 0000000..0ef7e03
--- /dev/null
+++ b/mpn/generic/mu_divappr_q.c

@@ -0,0 +1,368 @@
+/* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q.
+
+   Compute Q = floor(N / D) + e.  N is nn limbs, D is dn limbs and must be
+   normalized, and Q must be nn-dn limbs, 0 <= e <= 4.  The requirement that Q
+   is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us
+   to let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+ Things to work on:
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+static mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t,
+			 mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+static mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
+
+mp_limb_t
+mpn_mu_divappr_q (mp_ptr qp,
+		  mp_srcptr np,
+		  mp_size_t nn,
+		  mp_srcptr dp,
+		  mp_size_t dn,
+		  mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* If Q is smaller than D, truncate operands. */
+  if (qn + 1 < dn)
+    {
+      np += dn - (qn + 1);
+      nn -= dn - (qn + 1);
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+	MPN_ZERO (ip, in);
+      else
+	{
+	  mpn_invertappr (ip, tp, in + 1, tp + in + 1);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+static mp_limb_t
+mpn_preinv_mu_divappr_q (mp_ptr qp,
+			 mp_srcptr np,
+			 mp_size_t nn,
+			 mp_srcptr dp,
+			 mp_size_t dn,
+			 mp_srcptr ip,
+			 mp_size_t in,
+			 mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define rp           scratch
+#define tp           (scratch + dn)
+#define scratch_out  (scratch + dn + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY (rp, np, dn);
+
+  if (UNLIKELY (qn == 0))
+    return qh;			/* Degenerate use.  Should we allow this? */
+
+  for (;;) /* The exit condition (qn == 0) is verified in the loop. */
+    {
+      if (qn < in)
+	{
+	  ip += in - qn;
+	  in = qn;
+	}
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+	 by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+      if (qn == 0)
+	break;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+	 subtracted from the partial remainder combined with new limbs from the
+	 dividend N.  We only really need the low dn limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
+      else
+	{
+	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
+	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  if (wn > 0)
+	    {
+	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+	      ASSERT_ALWAYS (cx >= cy);
+	      mpn_incr_u (tp, cx - cy);
+	    }
+	}
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+	 limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+	{
+	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */
+	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */
+	}
+      else
+	{
+	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */
+	}
+
+      STAT (int i; int err = 0;
+	    static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+	{
+	  /* We loop 0 times with about 69% probability, 1 time with about 31%
+	     probability, 2 times with about 0.6% probability, if inverse is
+	     computed as recommended.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  r -= cy;
+	  STAT (err++);
+	}
+      if (mpn_cmp (rp, dp, dn) >= 0)
+	{
+	  /* This is executed with about 76% probability.  */
+	  mpn_incr_u (qp, 1);
+	  cy = mpn_sub_n (rp, rp, dp, dn);
+	  STAT (err++);
+	}
+
+      STAT (
+	    tot++;
+	    errarr[err]++;
+	    if (err > err_rec)
+	      err_rec = err;
+	    if (tot % 0x10000 == 0)
+	      {
+		for (i = 0; i <= err_rec; i++)
+		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+		printf ("\n");
+	      }
+	    );
+    }
+
+  /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
+     quotient.  For now, just make sure the returned quotient is >= the real
+     quotient; add 3 with saturating arithmetic.  */
+  qn = nn - dn;
+  cy += mpn_add_1 (qp, qp, qn, 3);
+  if (cy != 0)
+    {
+      if (qh != 0)
+	{
+	  /* Return a quotient of just 1-bits, with qh set.  */
+	  mp_size_t i;
+	  for (i = 0; i < qn; i++)
+	    qp[i] = GMP_NUMB_MAX;
+	}
+      else
+	{
+	  /* Propagate carry into qh.  */
+	  qh = 1;
+	}
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+static mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn, in, itch_local, itch_out, itch_invapp;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dn = qn + 1;
+    }
+  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
+
+  itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+  ASSERT (dn + itch_local + itch_out >= itch_invapp);
+  return in + MAX (dn + itch_local + itch_out, itch_invapp);
+}

diff --git a/mpn/generic/mul.c b/mpn/generic/mul.c
new file mode 100644
index 0000000..37444e9
--- /dev/null
+++ b/mpn/generic/mul.c

@@ -0,0 +1,441 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012,
+2014, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#ifndef MUL_BASECASE_MAX_UN
+#define MUL_BASECASE_MAX_UN 500
+#endif
+
+/* Areas where the different toom algorithms can be called (extracted
+   from the t-toom*.c files, and ignoring small constant offsets):
+
+   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
+                                        4/7              6/7
+				       6/11
+                                       |--------------------| toom22 (small)
+                                                           || toom22 (large)
+                                                       |xxxx| toom22 called
+                      |-------------------------------------| toom32
+                                         |xxxxxxxxxxxxxxxx| | toom32 called
+                                               |------------| toom33
+                                                          |x| toom33 called
+             |---------------------------------|            | toom42
+	              |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
+                                       |--------------------| toom43
+                                               |xxxxxxxxxx|   toom43 called
+         |-----------------------------|                      toom52 (unused)
+                                                   |--------| toom44
+						   |xxxxxxxx| toom44 called
+                              |--------------------|        | toom53
+                                        |xxxxxx|              toom53 called
+    |-------------------------|                               toom62 (unused)
+                                           |----------------| toom54 (unused)
+                      |--------------------|                  toom63
+	                      |xxxxxxxxx|                   | toom63 called
+                          |---------------------------------| toom6h
+						   |xxxxxxxx| toom6h called
+                                  |-------------------------| toom8h (32 bit)
+                 |------------------------------------------| toom8h (64 bit)
+						   |xxxxxxxx| toom8h called
+*/
+
+#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
+#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+   (pointed to by VP, with VN limbs), and store the result at PRODP.  The
+   result is UN + VN limbs.  Return the most significant limb of the result.
+
+   NOTE: The space pointed to by PRODP is overwritten before finished with U
+   and V, so overlap is an error.
+
+   Argument constraints:
+   1. UN >= VN.
+   2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+      the multiplier and the multiplicand.  */
+
+/*
+  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
+    ideal lines of the surrounding algorithms.  Is that optimal?
+
+  * The toomX3 code now uses a structure similar to the one of toomX2, except
+    that it loops longer in the unbalanced case.  The result is that the
+    remaining area might have un < vn.  Should we fix the toomX2 code in a
+    similar way?
+
+  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
+    therefore calls mpn_mul recursively for certain cases.
+
+  * Allocate static temp space using THRESHOLD variables (except for toom44
+    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
+
+  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
+    have the same vn threshold.  This is not true, we should actually use
+    mul_basecase for slightly larger operands for toom32 than for toom22, and
+    even larger for toom42.
+
+  * That problem is even more prevalent for toomX3.  We therefore use special
+    THRESHOLD variables there.
+*/
+
+mp_limb_t
+mpn_mul (mp_ptr prodp,
+	 mp_srcptr up, mp_size_t un,
+	 mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
+
+  if (BELOW_THRESHOLD (un, MUL_TOOM22_THRESHOLD))
+    {
+      /* When un (and thus vn) is below the toom22 range, do mul_basecase.
+	 Test un and not vn here not to thwart the un >> vn code below.
+	 This special case is not necessary, but cuts the overhead for the
+	 smallest operands. */
+      mpn_mul_basecase (prodp, up, un, vp, vn);
+    }
+  else if (un == vn)
+    {
+      mpn_mul_n (prodp, up, vp, un);
+    }
+  else if (vn < MUL_TOOM22_THRESHOLD)
+    { /* plain schoolbook multiplication */
+
+      /* Unless un is very large, or else if have an applicable mpn_mul_N,
+	 perform basecase multiply directly.  */
+      if (un <= MUL_BASECASE_MAX_UN
+#if HAVE_NATIVE_mpn_mul_2
+	  || vn <= 2
+#else
+	  || vn == 1
+#endif
+	  )
+	mpn_mul_basecase (prodp, up, un, vp, vn);
+      else
+	{
+	  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
+	     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
+	     these pieces with the vp[] operand.  After each such partial
+	     multiplication (but the last) we copy the most significant vn
+	     limbs into a temporary buffer since that part would otherwise be
+	     overwritten by the next multiplication.  After the next
+	     multiplication, we add it back.  This illustrates the situation:
+
+                                                    -->vn<--
+                                                      |  |<------- un ------->|
+                                                         _____________________|
+                                                        X                    /|
+                                                      /XX__________________/  |
+                                    _____________________                     |
+                                   X                    /                     |
+                                 /XX__________________/                       |
+               _____________________                                          |
+              /                    /                                          |
+            /____________________/                                            |
+	    ==================================================================
+
+	    The parts marked with X are the parts whose sums are copied into
+	    the temporary buffer.  */
+
+	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+	  mp_limb_t cy;
+	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+
+	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+	  prodp += MUL_BASECASE_MAX_UN;
+	  MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
+	  up += MUL_BASECASE_MAX_UN;
+	  un -= MUL_BASECASE_MAX_UN;
+	  while (un > MUL_BASECASE_MAX_UN)
+	    {
+	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += MUL_BASECASE_MAX_UN;
+	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
+	      up += MUL_BASECASE_MAX_UN;
+	      un -= MUL_BASECASE_MAX_UN;
+	    }
+	  if (un > vn)
+	    {
+	      mpn_mul_basecase (prodp, up, un, vp, vn);
+	    }
+	  else
+	    {
+	      ASSERT (un > 0);
+	      mpn_mul_basecase (prodp, vp, vn, up, un);
+	    }
+	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+	  mpn_incr_u (prodp + vn, cy);
+	}
+    }
+  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
+    {
+      /* Use ToomX2 variants */
+      mp_ptr scratch;
+      TMP_SDECL; TMP_SMARK;
+
+#define ITCH_TOOMX2 (9 * vn / 2 + GMP_NUMB_BITS * 2)
+      scratch = TMP_SALLOC_LIMBS (ITCH_TOOMX2);
+      ASSERT (mpn_toom22_mul_itch ((5*vn-1)/4, vn) <= ITCH_TOOMX2); /* 5vn/2+ */
+      ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX2); /* 7vn/6+ */
+      ASSERT (mpn_toom42_mul_itch (3 * vn - 1, vn) <= ITCH_TOOMX2); /* 9vn/2+ */
+#undef ITCH_TOOMX2
+
+      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
+	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
+	 wise; we would get better balance by slightly moving the bound.  We
+	 will sometimes end up with un < vn, like in the X3 arm below.  */
+      if (un >= 3 * vn)
+	{
+	  mp_limb_t cy;
+	  mp_ptr ws;
+
+	  /* The maximum ws usage is for the mpn_mul result.  */
+	  ws = TMP_SALLOC_LIMBS (4 * vn);
+
+	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	  un -= 2 * vn;
+	  up += 2 * vn;
+	  prodp += 2 * vn;
+
+	  while (un >= 3 * vn)
+	    {
+	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+	      un -= 2 * vn;
+	      up += 2 * vn;
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += 2 * vn;
+	    }
+
+	  /* vn <= un < 3vn */
+
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+	  else
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
+	}
+      else
+	{
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+	  else
+	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+	}
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
+	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+    {
+      /* Handle the largest operands that are not in the FFT range.  The 2nd
+	 condition makes very unbalanced operands avoid the FFT code (except
+	 perhaps as coefficient products of the Toom code.  */
+
+      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
+	{
+	  /* Use ToomX3 variants */
+	  mp_ptr scratch;
+	  TMP_DECL; TMP_MARK;
+
+#define ITCH_TOOMX3 (4 * vn + GMP_NUMB_BITS)
+	  scratch = TMP_ALLOC_LIMBS (ITCH_TOOMX3);
+	  ASSERT (mpn_toom33_mul_itch ((7*vn-1)/6, vn) <= ITCH_TOOMX3); /* 7vn/2+ */
+	  ASSERT (mpn_toom43_mul_itch ((3*vn-1)/2, vn) <= ITCH_TOOMX3); /* 9vn/4+ */
+	  ASSERT (mpn_toom32_mul_itch ((7*vn-1)/4, vn) <= ITCH_TOOMX3); /* 7vn/6+ */
+	  ASSERT (mpn_toom53_mul_itch ((11*vn-1)/6, vn) <= ITCH_TOOMX3); /* 11vn/3+ */
+	  ASSERT (mpn_toom42_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+	  ASSERT (mpn_toom63_mul_itch ((5*vn-1)/2, vn) <= ITCH_TOOMX3); /* 15vn/4+ */
+#undef ITCH_TOOMX3
+
+	  if (2 * un >= 5 * vn)
+	    {
+	      mp_limb_t cy;
+	      mp_ptr ws;
+
+	      /* The maximum ws usage is for the mpn_mul result.  */
+	      ws = TMP_ALLOC_LIMBS (7 * vn >> 1);
+
+	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	      else
+		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	      un -= 2 * vn;
+	      up += 2 * vn;
+	      prodp += 2 * vn;
+
+	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+		  else
+		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
+		  un -= 2 * vn;
+		  up += 2 * vn;
+		  cy = mpn_add_n (prodp, prodp, ws, vn);
+		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+		  mpn_incr_u (prodp + vn, cy);
+		  prodp += 2 * vn;
+		}
+
+	      /* vn / 2 <= un < 2.5vn */
+
+	      if (un < vn)
+		mpn_mul (ws, vp, vn, up, un);
+	      else
+		mpn_mul (ws, up, un, vp, vn);
+
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, un);
+	      mpn_incr_u (prodp + vn, cy);
+	    }
+	  else
+	    {
+	      if (6 * un < 7 * vn)
+		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
+	      else if (2 * un < 3 * vn)
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
+		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+		  else
+		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
+		}
+	      else if (6 * un < 11 * vn)
+		{
+		  if (4 * un < 7 * vn)
+		    {
+		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
+			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+		      else
+			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+		    }
+		  else
+		    {
+		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
+			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+		      else
+			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+		    }
+		}
+	      else
+		{
+		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+		  else
+		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
+		}
+	    }
+	  TMP_FREE;
+	}
+      else
+	{
+	  mp_ptr scratch;
+	  TMP_DECL; TMP_MARK;
+
+	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
+	    {
+	      scratch = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
+	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
+	    {
+	      scratch = TMP_SALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
+	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  else
+	    {
+	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
+	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
+	    }
+	  TMP_FREE;
+	}
+    }
+  else
+    {
+      if (un >= 8 * vn)
+	{
+	  mp_limb_t cy;
+	  mp_ptr ws;
+	  TMP_DECL; TMP_MARK;
+
+	  /* The maximum ws usage is for the mpn_mul result.  */
+	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
+
+	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
+	  un -= 3 * vn;
+	  up += 3 * vn;
+	  prodp += 3 * vn;
+
+	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
+	    {
+	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
+	      un -= 3 * vn;
+	      up += 3 * vn;
+	      cy = mpn_add_n (prodp, prodp, ws, vn);
+	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
+	      mpn_incr_u (prodp + vn, cy);
+	      prodp += 3 * vn;
+	    }
+
+	  /* vn / 2 <= un < 3.5vn */
+
+	  if (un < vn)
+	    mpn_mul (ws, vp, vn, up, un);
+	  else
+	    mpn_mul (ws, up, un, vp, vn);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
+
+	  TMP_FREE;
+	}
+      else
+	mpn_fft_mul (prodp, up, un, vp, vn);
+    }
+
+  return prodp[un + vn - 1];	/* historic */
+}

diff --git a/mpn/generic/mul_1.c b/mpn/generic/mul_1.c
new file mode 100644
index 0000000..52d46da
--- /dev/null
+++ b/mpn/generic/mul_1.c

@@ -0,0 +1,96 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
+   product in a second limb vector.
+
+Copyright 1991-1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, lpl, hpl, prev_hpl, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      xw = prev_hpl + lpl + cl;
+      cl = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif

diff --git a/mpn/generic/mul_basecase.c b/mpn/generic/mul_basecase.c
new file mode 100644
index 0000000..2487fba
--- /dev/null
+++ b/mpn/generic/mul_basecase.c

@@ -0,0 +1,165 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+   of length m and n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1991-1994, 1996, 1997, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Multiply {up,usize} by {vp,vsize} and write the result to
+   {prodp,usize+vsize}.  Must have usize>=vsize.
+
+   Note that prodp gets usize+vsize limbs stored, even if the actual result
+   only needs usize+vsize-1.
+
+   There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+   Currently this is allowed, but it might not be in the future.
+
+   This is the most critical code for multiplication.  All multiplies rely
+   on this, both small and huge.  Small ones arrive here immediately, huge
+   ones arrive here as this is the base case for Karatsuba's recursive
+   algorithm.  */
+
+void
+mpn_mul_basecase (mp_ptr rp,
+		  mp_srcptr up, mp_size_t un,
+		  mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
+
+  /* We first multiply by the low order limb (or depending on optional function
+     availability, limbs).  This result can be stored, not added, to rp.  We
+     also avoid a loop for zeroing this way.  */
+
+#if HAVE_NATIVE_mpn_mul_2
+  if (vn >= 2)
+    {
+      rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
+      rp += 2, vp += 2, vn -= 2;
+    }
+  else
+    {
+      rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+      return;
+    }
+#else
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+  rp += 1, vp += 1, vn -= 1;
+#endif
+
+  /* Now accumulate the product of up[] and the next higher limb (or depending
+     on optional function availability, limbs) from vp[].  */
+
+#define MAX_LEFT MP_SIZE_T_MAX	/* Used to simplify loops into if statements */
+
+
+#if HAVE_NATIVE_mpn_addmul_6
+  while (vn >= 6)
+    {
+      rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);
+      if (MAX_LEFT == 6)
+	return;
+      rp += 6, vp += 6, vn -= 6;
+      if (MAX_LEFT < 2 * 6)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (6 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_5
+  while (vn >= 5)
+    {
+      rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);
+      if (MAX_LEFT == 5)
+	return;
+      rp += 5, vp += 5, vn -= 5;
+      if (MAX_LEFT < 2 * 5)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (5 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_4
+  while (vn >= 4)
+    {
+      rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
+      if (MAX_LEFT == 4)
+	return;
+      rp += 4, vp += 4, vn -= 4;
+      if (MAX_LEFT < 2 * 4)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (4 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_3
+  while (vn >= 3)
+    {
+      rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
+      if (MAX_LEFT == 3)
+	return;
+      rp += 3, vp += 3, vn -= 3;
+      if (MAX_LEFT < 2 * 3)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (3 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+  while (vn >= 2)
+    {
+      rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
+      if (MAX_LEFT == 2)
+	return;
+      rp += 2, vp += 2, vn -= 2;
+      if (MAX_LEFT < 2 * 2)
+	break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (2 - 1)
+#endif
+
+  while (vn >= 1)
+    {
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+      if (MAX_LEFT == 1)
+	return;
+      rp += 1, vp += 1, vn -= 1;
+    }
+}

diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c
new file mode 100644
index 0000000..76a2106
--- /dev/null
+++ b/mpn/generic/mul_fft.c

@@ -0,0 +1,1105 @@
+/* Schoenhage's fast multiplication modulo 2^N+1.
+
+   Contributed by Paul Zimmermann.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1998-2010, 2012, 2013, 2018, 2020, 2022 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* References:
+
+   Schnelle Multiplikation grosser Zahlen, by Arnold Schoenhage and Volker
+   Strassen, Computing 7, p. 281-292, 1971.
+
+   Asymptotically fast algorithms for the numerical multiplication and division
+   of polynomials with complex coefficients, by Arnold Schoenhage, Computer
+   Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+   Tapes versus Pointers, a study in implementing fast algorithms, by Arnold
+   Schoenhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+   TODO:
+
+   Implement some of the tricks published at ISSAC'2007 by Gaudry, Kruppa, and
+   Zimmermann.
+
+   It might be possible to avoid a small number of MPN_COPYs by using a
+   rotating temporary or two.
+
+   Cleanup and simplify the code!
+*/
+
+#ifdef TRACE
+#undef TRACE
+#define TRACE(x) x
+#include <stdio.h>
+#else
+#define TRACE(x)
+#endif
+
+#include "gmp-impl.h"
+
+#ifdef WANT_ADDSUB
+#include "generic/add_n_sub_n.c"
+#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#endif
+
+static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
+				       mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
+				       mp_size_t, mp_size_t, int **, mp_ptr, int);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, mp_size_t, mp_size_t, mp_srcptr,
+				   mp_size_t, mp_size_t, mp_size_t, mp_ptr);
+
+
+/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
+   We have sqr=0 if for a multiply, sqr=1 for a square.
+   There are three generations of this code; we keep the old ones as long as
+   some gmp-mparam.h is not updated.  */
+
+
+/*****************************************************************************/
+
+#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+
+#ifndef FFT_TABLE3_SIZE		/* When tuning this is defined in gmp-impl.h */
+#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
+#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#else
+#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
+#endif
+#endif
+#endif
+
+#ifndef FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE 200
+#endif
+
+FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+{
+  MUL_FFT_TABLE3,
+  SQR_FFT_TABLE3
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  const struct fft_table_nk *fft_tab, *tab;
+  mp_size_t tab_n, thres;
+  int last_k;
+
+  fft_tab = mpn_fft_table3[sqr];
+  last_k = fft_tab->k;
+  for (tab = fft_tab + 1; ; tab++)
+    {
+      tab_n = tab->n;
+      thres = tab_n << last_k;
+      if (n <= thres)
+	break;
+      last_k = tab->k;
+    }
+  return last_k;
+}
+
+#define MPN_FFT_BEST_READY 1
+#endif
+
+/*****************************************************************************/
+
+#if ! defined (MPN_FFT_BEST_READY)
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
+{
+  MUL_FFT_TABLE,
+  SQR_FFT_TABLE
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  int i;
+
+  for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+    if (n < mpn_fft_table[sqr][i])
+      return i + FFT_FIRST_K;
+
+  /* treat 4*last as one further entry */
+  if (i == 0 || n < 4 * mpn_fft_table[sqr][i - 1])
+    return i + FFT_FIRST_K;
+  else
+    return i + FFT_FIRST_K + 1;
+}
+#endif
+
+/*****************************************************************************/
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
+   i.e. smallest multiple of 2^k >= pl.
+
+   Don't declare static: needed by tuneup.
+*/
+
+mp_size_t
+mpn_fft_next_size (mp_size_t pl, int k)
+{
+  pl = 1 + ((pl - 1) >> k); /* ceil (pl/2^k) */
+  return pl << k;
+}
+
+
+/* Initialize l[i][j] with bitrev(j) */
+static void
+mpn_fft_initl (int **l, int k)
+{
+  int i, j, K;
+  int *li;
+
+  l[0][0] = 0;
+  for (i = 1, K = 1; i <= k; i++, K *= 2)
+    {
+      li = l[i];
+      for (j = 0; j < K; j++)
+	{
+	  li[j] = 2 * l[i - 1][j];
+	  li[K + j] = 1 + li[j];
+	}
+    }
+}
+
+
+/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+   Assumes a is semi-normalized, i.e. a[n] <= 1.
+   r and a must have n+1 limbs, and not overlap.
+*/
+static void
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
+{
+  unsigned int sh;
+  mp_size_t m;
+  mp_limb_t cc, rd;
+
+  sh = d % GMP_NUMB_BITS;
+  m = d / GMP_NUMB_BITS;
+
+  if (m >= n)			/* negate */
+    {
+      /* r[0..m-1]  <-- lshift(a[n-m]..a[n-1], sh)
+	 r[m..n-1]  <-- -lshift(a[0]..a[n-m-1],  sh) */
+
+      m -= n;
+      if (sh != 0)
+	{
+	  /* no out shift below since a[n] <= 1 */
+	  mpn_lshift (r, a + n - m, m + 1, sh);
+	  rd = r[m];
+	  cc = mpn_lshiftc (r + m, a, n - m, sh);
+	}
+      else
+	{
+	  MPN_COPY (r, a + n - m, m);
+	  rd = a[n];
+	  mpn_com (r + m, a, n - m);
+	  cc = 0;
+	}
+
+      /* add cc to r[0], and add rd to r[m] */
+
+      /* now add 1 in r[m], subtract 1 in r[n], i.e. add 1 in r[0] */
+
+      r[n] = 0;
+      /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
+      ++cc;
+      MPN_INCR_U (r, n + 1, cc);
+
+      ++rd;
+      /* rd might overflow when sh=GMP_NUMB_BITS-1 */
+      cc = rd + (rd == 0);
+      r = r + m + (rd == 0);
+      MPN_INCR_U (r, n + 1 - m - (rd == 0), cc);
+    }
+  else
+    {
+      /* r[0..m-1]  <-- -lshift(a[n-m]..a[n-1], sh)
+	 r[m..n-1]  <-- lshift(a[0]..a[n-m-1],  sh)  */
+      if (sh != 0)
+	{
+	  /* no out bits below since a[n] <= 1 */
+	  mpn_lshiftc (r, a + n - m, m + 1, sh);
+	  rd = ~r[m];
+	  /* {r, m+1} = {a+n-m, m+1} << sh */
+	  cc = mpn_lshift (r + m, a, n - m, sh); /* {r+m, n-m} = {a, n-m}<<sh */
+	}
+      else
+	{
+	  /* r[m] is not used below, but we save a test for m=0 */
+	  mpn_com (r, a + n - m, m + 1);
+	  rd = a[n];
+	  MPN_COPY (r + m, a, n - m);
+	  cc = 0;
+	}
+
+      /* now complement {r, m}, subtract cc from r[0], subtract rd from r[m] */
+
+      /* if m=0 we just have r[0]=a[n] << sh */
+      if (m != 0)
+	{
+	  /* now add 1 in r[0], subtract 1 in r[m] */
+	  if (cc-- == 0) /* then add 1 to r[0] */
+	    cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+	  cc = mpn_sub_1 (r, r, m, cc) + 1;
+	  /* add 1 to cc instead of rd since rd might overflow */
+	}
+
+      /* now subtract cc and rd from r[m..n] */
+
+      r[n] = 2; /* Add a value, to avoid borrow propagation */
+      MPN_DECR_U (r + m, n - m + 1, cc);
+      MPN_DECR_U (r + m, n - m + 1, rd);
+      /* Remove the added value, and check for a possible borrow. */
+      if (UNLIKELY ((r[n] -= 2) != 0))
+	{
+	  mp_limb_t cy = -r[n];
+	  /* cy should always be 1, except in the very unlikely case
+	     m=n-1, r[m]=0, cc+rd>GMP_NUMB_MAX+1. Never triggered.
+	     Is it actually possible? */
+	  r[n] = 0;
+	  MPN_INCR_U (r, n + 1, cy);
+	}
+    }
+}
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+static inline void
+mpn_fft_add_sub_modF (mp_ptr A0, mp_ptr Ai, mp_srcptr tp, mp_size_t n)
+{
+  mp_limb_t cyas, c, x;
+
+  cyas = mpn_add_n_sub_n (A0, Ai, A0, tp, n);
+
+  c = A0[n] - tp[n] - (cyas & 1);
+  x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+  Ai[n] = x + c;
+  MPN_INCR_U (Ai, n + 1, x);
+
+  c = A0[n] + tp[n] + (cyas >> 1);
+  x = (c - 1) & -(c != 0);
+  A0[n] = c - x;
+  MPN_DECR_U (A0, n + 1, x);
+}
+
+#else /* ! HAVE_NATIVE_mpn_add_n_sub_n  */
+
+/* r <- a+b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] + b[n] + mpn_add_n (r, a, b, n);
+  /* 0 <= c <= 3 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (c - 1) & -(c != 0);
+  r[n] = c - x;
+  MPN_DECR_U (r, n + 1, x);
+#endif
+#if 0
+  if (c > 1)
+    {
+      r[n] = 1;                       /* r[n] - c = 1 */
+      MPN_DECR_U (r, n + 1, c - 1);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+
+/* r <- a-b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] - b[n] - mpn_sub_n (r, a, b, n);
+  /* -2 <= c <= 1 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+  r[n] = x + c;
+  MPN_INCR_U (r, n + 1, x);
+#endif
+#if 0
+  if ((c & GMP_LIMB_HIGHBIT) != 0)
+    {
+      r[n] = 0;
+      MPN_INCR_U (r, n + 1, -c);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+#endif /* HAVE_NATIVE_mpn_add_n_sub_n */
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+	  N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
+	     mp_size_t omega, mp_size_t n, mp_size_t inc, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
+      cy = mpn_sub_n (Ap[inc], tp, Ap[inc], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+	{ /* Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1); */
+	  mp_limb_t cc = Ap[0][n] - 1;
+	  Ap[0][n] = 1;
+	  MPN_DECR_U (Ap[0], n + 1, cc);
+	}
+      if (cy) /* Ap[inc][n] can be -1 or -2 */
+	{ /* Ap[inc][n] = mpn_add_1 (Ap[inc], Ap[inc], n, ~Ap[inc][n] + 1); */
+	  mp_limb_t cc = ~Ap[inc][n] + 1;
+	  Ap[inc][n] = 0;
+	  MPN_INCR_U (Ap[inc], n + 1, cc);
+	}
+    }
+  else
+    {
+      mp_size_t j, K2 = K >> 1;
+      int *lk = *ll;
+
+      mpn_fft_fft (Ap,     K2, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap+inc, K2, ll-1, 2 * omega, n, inc * 2, tp);
+      /* A[2*j*inc]   <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+	 A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+      for (j = 0; j < K2; j++, lk += 2, Ap += 2 * inc)
+	{
+	  /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
+	     Ap[0]   <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
+	  mpn_fft_mul_2exp_modF (tp, Ap[inc], lk[0] * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+	  mpn_fft_add_sub_modF (Ap[0], Ap[inc], tp, n);
+#else
+	  mpn_fft_sub_modF (Ap[inc], Ap[0], tp, n);
+	  mpn_fft_add_modF (Ap[0],   Ap[0], tp, n);
+#endif
+	}
+    }
+}
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+	  N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1
+   tp must have space for 2*(n+1) limbs.
+*/
+
+
+/* Given ap[0..n] with ap[n]<=1, reduce it modulo 2^(n*GMP_NUMB_BITS)+1,
+   by subtracting that modulus if necessary.
+
+   If ap[0..n] is exactly 2^(n*GMP_NUMB_BITS) then mpn_sub_1 produces a
+   borrow and the limbs must be zeroed out again.  This will occur very
+   infrequently.  */
+
+static inline void
+mpn_fft_normalize (mp_ptr ap, mp_size_t n)
+{
+  if (ap[n] != 0)
+    {
+      MPN_DECR_U (ap, n + 1, CNST_LIMB(1));
+      if (ap[n] == 0)
+	{
+	  /* This happens with very low probability; we have yet to trigger it,
+	     and thereby make sure this code is correct.  */
+	  MPN_ZERO (ap, n);
+	  ap[n] = 1;
+	}
+      else
+	ap[n] = 0;
+    }
+}
+
+/* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
+static void
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
+{
+  int i;
+  unsigned k;
+  int sqr = (ap == bp);
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int k;
+      int **fft_l, *tmp;
+      mp_ptr *Ap, *Bp, A, B, T;
+
+      k = mpn_fft_best_k (n, sqr);
+      K2 = (mp_size_t) 1 << k;
+      ASSERT_ALWAYS((n & (K2 - 1)) == 0);
+      maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
+      M2 = n * GMP_NUMB_BITS >> k;
+      l = n >> k;
+      Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK;
+      /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/
+      nprime2 = Nprime2 / GMP_NUMB_BITS;
+
+      /* we should ensure that nprime2 is a multiple of the next K */
+      if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+	{
+	  mp_size_t K3;
+	  for (;;)
+	    {
+	      K3 = (mp_size_t) 1 << mpn_fft_best_k (nprime2, sqr);
+	      if ((nprime2 & (K3 - 1)) == 0)
+		break;
+	      nprime2 = (nprime2 + K3 - 1) & -K3;
+	      Nprime2 = nprime2 * GMP_LIMB_BITS;
+	      /* warning: since nprime2 changed, K3 may change too! */
+	    }
+	}
+      ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */
+
+      Mp2 = Nprime2 >> k;
+
+      Ap = TMP_BALLOC_MP_PTRS (K2);
+      Bp = TMP_BALLOC_MP_PTRS (K2);
+      A = TMP_BALLOC_LIMBS (2 * (nprime2 + 1) << k);
+      T = TMP_BALLOC_LIMBS (2 * (nprime2 + 1));
+      B = A + ((nprime2 + 1) << k);
+      fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+      tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+      for (i = 0; i <= k; i++)
+	{
+	  fft_l[i] = tmp;
+	  tmp += (mp_size_t) 1 << i;
+	}
+
+      mpn_fft_initl (fft_l, k);
+
+      TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
+		    n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+      for (i = 0; i < K; i++, ap++, bp++)
+	{
+	  mp_limb_t cy;
+	  mpn_fft_normalize (*ap, n);
+	  if (!sqr)
+	    mpn_fft_normalize (*bp, n);
+
+	  mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
+	  if (!sqr)
+	    mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
+
+	  cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
+				     l, Mp2, fft_l, T, sqr);
+	  (*ap)[n] = cy;
+	}
+    }
+#if ! TUNE_PROGRAM_BUILD
+  else if (MPN_MULMOD_BKNP1_USABLE (n, k, MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_ptr a;
+      mp_size_t n_k = n / k;
+
+      if (sqr)
+       {
+	 mp_ptr tp = TMP_SALLOC_LIMBS (mpn_sqrmod_bknp1_itch (n));
+         for (i = 0; i < K; i++)
+           {
+             a = *ap++;
+             mpn_sqrmod_bknp1 (a, a, n_k, k, tp);
+           }
+       }
+      else
+       {
+	 mp_ptr b, tp = TMP_SALLOC_LIMBS (mpn_mulmod_bknp1_itch (n));
+         for (i = 0; i < K; i++)
+           {
+             a = *ap++;
+             b = *bp++;
+             mpn_mulmod_bknp1 (a, a, b, n_k, k, tp);
+           }
+       }
+    }
+#endif
+  else
+    {
+      mp_ptr a, b, tp, tpn;
+      mp_limb_t cc;
+      mp_size_t n2 = 2 * n;
+      tp = TMP_BALLOC_LIMBS (n2);
+      tpn = tp + n;
+      TRACE (printf ("  mpn_mul_n %ld of %ld limbs\n", K, n));
+      for (i = 0; i < K; i++)
+	{
+	  a = *ap++;
+	  b = *bp++;
+	  if (sqr)
+	    mpn_sqr (tp, a, n);
+	  else
+	    mpn_mul_n (tp, b, a, n);
+	  if (a[n] != 0)
+	    cc = mpn_add_n (tpn, tpn, b, n);
+	  else
+	    cc = 0;
+	  if (b[n] != 0)
+	    cc += mpn_add_n (tpn, tpn, a, n) + a[n];
+	  if (cc != 0)
+	    {
+	      cc = mpn_add_1 (tp, tp, n2, cc);
+	      /* If mpn_add_1 give a carry (cc != 0),
+		 the result (tp) is at most GMP_NUMB_MAX - 1,
+		 so the following addition can't overflow.
+	      */
+	      tp[0] += cc;
+	    }
+	  cc = mpn_sub_n (a, tp, tpn, n);
+	  a[n] = 0;
+	  MPN_INCR_U (a, n + 1, cc);
+	}
+    }
+  TMP_FREE;
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+   output: K*A[0] K*A[K-1] ... K*A[1].
+   Assumes the Ap[] are pseudo-normalized, i.e. 0 <= Ap[][n] <= 1.
+   This condition is also fulfilled at exit.
+*/
+static void
+mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
+      cy = mpn_sub_n (Ap[1], tp, Ap[1], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+	{ /* Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1); */
+	  mp_limb_t cc = Ap[0][n] - 1;
+	  Ap[0][n] = 1;
+	  MPN_DECR_U (Ap[0], n + 1, cc);
+	}
+      if (cy) /* Ap[1][n] can be -1 or -2 */
+	{ /* Ap[1][n] = mpn_add_1 (Ap[1], Ap[1], n, ~Ap[1][n] + 1); */
+	  mp_limb_t cc = ~Ap[1][n] + 1;
+	  Ap[1][n] = 0;
+	  MPN_INCR_U (Ap[1], n + 1, cc);
+	}
+    }
+  else
+    {
+      mp_size_t j, K2 = K >> 1;
+
+      mpn_fft_fftinv (Ap,      K2, 2 * omega, n, tp);
+      mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
+      /* A[j]     <- A[j] + omega^j A[j+K/2]
+	 A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+      for (j = 0; j < K2; j++, Ap++)
+	{
+	  /* Ap[K2] <- Ap[0] + Ap[K2] * 2^((j + K2) * omega)
+	     Ap[0]  <- Ap[0] + Ap[K2] * 2^(j * omega) */
+	  mpn_fft_mul_2exp_modF (tp, Ap[K2], j * omega, n);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+	  mpn_fft_add_sub_modF (Ap[0], Ap[K2], tp, n);
+#else
+	  mpn_fft_sub_modF (Ap[K2], Ap[0], tp, n);
+	  mpn_fft_add_modF (Ap[0],  Ap[0], tp, n);
+#endif
+	}
+    }
+}
+
+
+/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+static void
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
+{
+  mp_bitcnt_t i;
+
+  ASSERT (r != a);
+  i = (mp_bitcnt_t) 2 * n * GMP_NUMB_BITS - k;
+  mpn_fft_mul_2exp_modF (r, a, i, n);
+  /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
+  /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
+  mpn_fft_normalize (r, n);
+}
+
+
+/* {rp,n} <- {ap,an} mod 2^(n*GMP_NUMB_BITS)+1, n <= an <= 3*n.
+   Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
+   then {rp,n}=0.
+*/
+static mp_size_t
+mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
+{
+  mp_size_t l, m, rpn;
+  mp_limb_t cc;
+
+  ASSERT ((n <= an) && (an <= 3 * n));
+  m = an - 2 * n;
+  if (m > 0)
+    {
+      l = n;
+      /* add {ap, m} and {ap+2n, m} in {rp, m} */
+      cc = mpn_add_n (rp, ap, ap + 2 * n, m);
+      /* copy {ap+m, n-m} to {rp+m, n-m} */
+      rpn = mpn_add_1 (rp + m, ap + m, n - m, cc);
+    }
+  else
+    {
+      l = an - n; /* l <= n */
+      MPN_COPY (rp, ap, n);
+      rpn = 0;
+    }
+
+  /* remains to subtract {ap+n, l} from {rp, n+1} */
+  rpn -= mpn_sub (rp, rp, n, ap + n, l);
+  if (rpn < 0) /* necessarily rpn = -1 */
+    rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
+  return rpn;
+}
+
+/* store in A[0..nprime] the first M bits from {n, nl},
+   in A[nprime+1..] the following M bits, ...
+   Assumes M is a multiple of GMP_NUMB_BITS (M = l * GMP_NUMB_BITS).
+   T must have space for at least (nprime + 1) limbs.
+   We must have nl <= 2*K*l.
+*/
+static void
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
+		       mp_srcptr n, mp_size_t nl, mp_size_t l, mp_size_t Mp,
+		       mp_ptr T)
+{
+  mp_size_t i, j;
+  mp_ptr tmp;
+  mp_size_t Kl = K * l;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
+    {
+      mp_size_t dif = nl - Kl;
+
+      tmp = TMP_BALLOC_LIMBS(Kl + 1);
+      tmp[Kl] = 0;
+
+#if ! WANT_OLD_FFT_FULL
+      ASSERT_ALWAYS (dif <= Kl);
+#else
+      /* The comment "We must have nl <= 2*K*l." says that
+	 ((dif = nl - Kl) > Kl) should never happen. */
+      if (UNLIKELY (dif > Kl))
+	{
+	  mp_limb_signed_t cy;
+	  int subp = 0;
+
+	  cy = mpn_sub_n (tmp, n, n + Kl, Kl);
+	  n += 2 * Kl;
+	  dif -= Kl;
+
+	  /* now dif > 0 */
+	  while (dif > Kl)
+	    {
+	      if (subp)
+		cy += mpn_sub_n (tmp, tmp, n, Kl);
+	      else
+		cy -= mpn_add_n (tmp, tmp, n, Kl);
+	      subp ^= 1;
+	      n += Kl;
+	      dif -= Kl;
+	    }
+	  /* now dif <= Kl */
+	  if (subp)
+	    cy += mpn_sub (tmp, tmp, Kl, n, dif);
+	  else
+	    cy -= mpn_add (tmp, tmp, Kl, n, dif);
+	  if (cy >= 0)
+	    MPN_INCR_U (tmp, Kl + 1, cy);
+	  else
+	    {
+	      tmp[Kl] = 1;
+	      MPN_DECR_U (tmp, Kl + 1, -cy - 1);
+	    }
+	}
+      else /* dif <= Kl, i.e. nl <= 2 * Kl */
+#endif
+	{
+	  mp_limb_t cy;
+	  cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
+	  MPN_INCR_U (tmp, Kl + 1, cy);
+	}
+      nl = Kl + 1;
+      n = tmp;
+    }
+  for (i = 0; i < K; i++)
+    {
+      Ap[i] = A;
+      /* store the next M bits of n into A[0..nprime] */
+      if (nl > 0) /* nl is the number of remaining limbs */
+	{
+	  j = (l <= nl && i < K - 1) ? l : nl; /* store j next limbs */
+	  nl -= j;
+	  MPN_COPY (T, n, j);
+	  MPN_ZERO (T + j, nprime + 1 - j);
+	  n += l;
+	  mpn_fft_mul_2exp_modF (A, T, i * Mp, nprime);
+	}
+      else
+	MPN_ZERO (A, nprime + 1);
+      A += nprime + 1;
+    }
+  ASSERT_ALWAYS (nl == 0);
+  TMP_FREE;
+}
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
+   op is pl limbs, its high bit is returned.
+   One must have pl = mpn_fft_next_size (pl, k).
+   T must have space for 2 * (nprime + 1) limbs.
+*/
+
+static mp_limb_t
+mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
+		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr unusedA, mp_ptr B,
+		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+		      int **fft_l, mp_ptr T, int sqr)
+{
+  mp_size_t K, i, pla, lo, sh, j;
+  mp_ptr p;
+  mp_limb_t cc;
+
+  K = (mp_size_t) 1 << k;
+
+  /* direct fft's */
+  mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+  if (!sqr)
+    mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+
+  /* term to term multiplications */
+  mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+
+  /* inverse fft's */
+  mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
+
+  /* division of terms after inverse fft */
+  Bp[0] = T + nprime + 1;
+  mpn_fft_div_2exp_modF (Bp[0], Ap[0], k, nprime);
+  for (i = 1; i < K; i++)
+    {
+      Bp[i] = Ap[i - 1];
+      mpn_fft_div_2exp_modF (Bp[i], Ap[i], k + (K - i) * Mp, nprime);
+    }
+
+  /* addition of terms in result p */
+  MPN_ZERO (T, nprime + 1);
+  pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+  p = B; /* B has K*(n' + 1) limbs, which is >= pla, i.e. enough */
+  MPN_ZERO (p, pla);
+  cc = 0; /* will accumulate the (signed) carry at p[pla] */
+  for (i = K - 1, lo = l * i + nprime,sh = l * i; i >= 0; i--,lo -= l,sh -= l)
+    {
+      mp_ptr n = p + sh;
+
+      j = (K - i) & (K - 1);
+
+      cc += mpn_add (n, n, pla - sh, Bp[j], nprime + 1);
+      T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
+      if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
+	{ /* subtract 2^N'+1 */
+	  cc -= mpn_sub_1 (n, n, pla - sh, CNST_LIMB(1));
+	  cc -= mpn_sub_1 (p + lo, p + lo, pla - lo, CNST_LIMB(1));
+	}
+    }
+  if (cc == -CNST_LIMB(1))
+    {
+      if ((cc = mpn_add_1 (p + pla - pl, p + pla - pl, pl, CNST_LIMB(1))))
+	{
+	  /* p[pla-pl]...p[pla-1] are all zero */
+	  mpn_sub_1 (p + pla - pl - 1, p + pla - pl - 1, pl + 1, CNST_LIMB(1));
+	  mpn_sub_1 (p + pla - 1, p + pla - 1, 1, CNST_LIMB(1));
+	}
+    }
+  else if (cc == 1)
+    {
+      if (pla >= 2 * pl)
+	{
+	  while ((cc = mpn_add_1 (p + pla - 2 * pl, p + pla - 2 * pl, 2 * pl, cc)))
+	    ;
+	}
+      else
+	{
+	  MPN_DECR_U (p + pla - pl, pl, cc);
+	}
+    }
+  else
+    ASSERT (cc == 0);
+
+  /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+     < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+     < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+  return mpn_fft_norm_modF (op, pl, p, pla);
+}
+
+/* return the lcm of a and 2^k */
+static mp_bitcnt_t
+mpn_mul_fft_lcm (mp_bitcnt_t a, int k)
+{
+  mp_bitcnt_t l = k;
+
+  while (a % 2 == 0 && k > 0)
+    {
+      a >>= 1;
+      k --;
+    }
+  return a << l;
+}
+
+
+mp_limb_t
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+	     mp_srcptr n, mp_size_t nl,
+	     mp_srcptr m, mp_size_t ml,
+	     int k)
+{
+  int i;
+  mp_size_t K, maxLK;
+  mp_size_t N, Nprime, nprime, M, Mp, l;
+  mp_ptr *Ap, *Bp, A, T, B;
+  int **fft_l, *tmp;
+  int sqr = (n == m && nl == ml);
+  mp_limb_t h;
+  TMP_DECL;
+
+  TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n", pl, nl, ml, k));
+  ASSERT_ALWAYS (mpn_fft_next_size (pl, k) == pl);
+
+  TMP_MARK;
+  N = pl * GMP_NUMB_BITS;
+  fft_l = TMP_BALLOC_TYPE (k + 1, int *);
+  tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+  for (i = 0; i <= k; i++)
+    {
+      fft_l[i] = tmp;
+      tmp += (mp_size_t) 1 << i;
+    }
+
+  mpn_fft_initl (fft_l, k);
+  K = (mp_size_t) 1 << k;
+  M = N >> k;	/* N = 2^k M */
+  l = 1 + (M - 1) / GMP_NUMB_BITS;
+  maxLK = mpn_mul_fft_lcm (GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+
+  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+  /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
+  nprime = Nprime / GMP_NUMB_BITS;
+  TRACE (printf ("N=%ld K=%ld, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
+		 N, K, M, l, maxLK, Nprime, nprime));
+  /* we should ensure that recursively, nprime is a multiple of the next K */
+  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t K2;
+      for (;;)
+	{
+	  K2 = (mp_size_t) 1 << mpn_fft_best_k (nprime, sqr);
+	  if ((nprime & (K2 - 1)) == 0)
+	    break;
+	  nprime = (nprime + K2 - 1) & -K2;
+	  Nprime = nprime * GMP_LIMB_BITS;
+	  /* warning: since nprime changed, K2 may change too! */
+	}
+      TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+    }
+  ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
+
+  T = TMP_BALLOC_LIMBS (2 * (nprime + 1));
+  Mp = Nprime >> k;
+
+  TRACE (printf ("%ldx%ld limbs -> %ld times %ldx%ld limbs (%1.2f)\n",
+		pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
+	 printf ("   temp space %ld\n", 2 * K * (nprime + 1)));
+
+  A = TMP_BALLOC_LIMBS (K * (nprime + 1));
+  Ap = TMP_BALLOC_MP_PTRS (K);
+  Bp = TMP_BALLOC_MP_PTRS (K);
+  mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
+  if (sqr)
+    {
+      mp_size_t pla;
+      pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+      B = TMP_BALLOC_LIMBS (pla);
+    }
+  else
+    {
+      B = TMP_BALLOC_LIMBS (K * (nprime + 1));
+      mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+    }
+  h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+
+  TMP_FREE;
+  return h;
+}
+
+#if WANT_OLD_FFT_FULL
+/* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
+void
+mpn_mul_fft_full (mp_ptr op,
+		  mp_srcptr n, mp_size_t nl,
+		  mp_srcptr m, mp_size_t ml)
+{
+  mp_ptr pad_op;
+  mp_size_t pl, pl2, pl3, l;
+  mp_size_t cc, c2, oldcc;
+  int k2, k3;
+  int sqr = (n == m && nl == ml);
+
+  pl = nl + ml; /* total number of limbs of the result */
+
+  /* perform a fft mod 2^(2N)+1 and one mod 2^(3N)+1.
+     We must have pl3 = 3/2 * pl2, with pl2 a multiple of 2^k2, and
+     pl3 a multiple of 2^k3. Since k3 >= k2, both are multiples of 2^k2,
+     and pl2 must be an even multiple of 2^k2. Thus (pl2,pl3) =
+     (2*j*2^k2,3*j*2^k2), which works for 3*j <= pl/2^k2 <= 5*j.
+     We need that consecutive intervals overlap, i.e. 5*j >= 3*(j+1),
+     which requires j>=2. Thus this scheme requires pl >= 6 * 2^FFT_FIRST_K. */
+
+  /*  ASSERT_ALWAYS(pl >= 6 * (1 << FFT_FIRST_K)); */
+
+  pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
+  do
+    {
+      pl2++;
+      k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
+      pl2 = mpn_fft_next_size (pl2, k2);
+      pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
+			    thus pl2 / 2 is exact */
+      k3 = mpn_fft_best_k (pl3, sqr);
+    }
+  while (mpn_fft_next_size (pl3, k3) != pl3);
+
+  TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl2=%ld pl3=%ld k=%d\n",
+		 nl, ml, pl2, pl3, k2));
+
+  ASSERT_ALWAYS(pl3 <= pl);
+  cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3);     /* mu */
+  ASSERT(cc == 0);
+  pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
+  cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
+  cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2);    /* lambda - low(mu) */
+  /* 0 <= cc <= 1 */
+  ASSERT(0 <= cc && cc <= 1);
+  l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
+  c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
+  cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
+  ASSERT(-1 <= cc && cc <= 1);
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  ASSERT(0 <= cc && cc <= 1);
+  /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
+  oldcc = cc;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+  cc += c2 >> 1; /* carry out from high <- low + high */
+  c2 = c2 & 1; /* borrow out from low <- low - high */
+#else
+  {
+    mp_ptr tmp;
+    TMP_DECL;
+
+    TMP_MARK;
+    tmp = TMP_BALLOC_LIMBS (l);
+    MPN_COPY (tmp, pad_op, l);
+    c2 = mpn_sub_n (pad_op,      pad_op, pad_op + l, l);
+    cc += mpn_add_n (pad_op + l, tmp,    pad_op + l, l);
+    TMP_FREE;
+  }
+#endif
+  c2 += oldcc;
+  /* first normalize {pad_op, pl2} before dividing by 2: c2 is the borrow
+     at pad_op + l, cc is the carry at pad_op + pl2 */
+  /* 0 <= cc <= 2 */
+  cc -= mpn_sub_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2);
+  /* -1 <= cc <= 2 */
+  if (cc > 0)
+    cc = -mpn_sub_1 (pad_op, pad_op, pl2, (mp_limb_t) cc);
+  /* now -1 <= cc <= 0 */
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  /* now {pad_op, pl2} is normalized, with 0 <= cc <= 1 */
+  if (pad_op[0] & 1) /* if odd, add 2^(pl2*GMP_NUMB_BITS)+1 */
+    cc += 1 + mpn_add_1 (pad_op, pad_op, pl2, CNST_LIMB(1));
+  /* now 0 <= cc <= 2, but cc=2 cannot occur since it would give a carry
+     out below */
+  mpn_rshift (pad_op, pad_op, pl2, 1); /* divide by two */
+  if (cc) /* then cc=1 */
+    pad_op [pl2 - 1] |= (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
+  /* now {pad_op,pl2}-cc = (lambda-mu)/(1-2^(l*GMP_NUMB_BITS))
+     mod 2^(pl2*GMP_NUMB_BITS) + 1 */
+  c2 = mpn_add_n (op, op, pad_op, pl2); /* no need to add cc (is 0) */
+  /* since pl2+pl3 >= pl, necessary the extra limbs (including cc) are zero */
+  MPN_COPY (op + pl3, pad_op, pl - pl3);
+  ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
+  __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
+  /* since the final result has at most pl limbs, no carry out below */
+  MPN_INCR_U (op + pl2, pl - pl2, (mp_limb_t) c2);
+}
+#endif

diff --git a/mpn/generic/mul_n.c b/mpn/generic/mul_n.c
new file mode 100644
index 0000000..36bd923
--- /dev/null
+++ b/mpn/generic/mul_n.c

@@ -0,0 +1,96 @@
+/* mpn_mul_n -- multiply natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
+
+  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+    {
+      mpn_mul_basecase (p, a, n, b, n);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
+					MUL_TOOM33_THRESHOLD_LIMIT-1)];
+      ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
+      mpn_toom22_mul (p, a, n, b, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
+      mpn_toom33_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+      mpn_toom44_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
+      mpn_toom6h_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
+      mpn_toom8h_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_fft_mul (p, a, n, b, n);
+    }
+}

diff --git a/mpn/generic/mullo_basecase.c b/mpn/generic/mullo_basecase.c
new file mode 100644
index 0000000..9a4cd3d
--- /dev/null
+++ b/mpn/generic/mullo_basecase.c

@@ -0,0 +1,90 @@
+/* mpn_mullo_basecase -- Internal routine to multiply two natural
+   numbers of length n and return the low part.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* FIXME: Should optionally use mpn_mul_2/mpn_addmul_2.  */
+
+#ifndef MULLO_VARIANT
+#define MULLO_VARIANT 2
+#endif
+
+
+#if MULLO_VARIANT == 1
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_size_t i;
+
+  mpn_mul_1 (rp, up, n, vp[0]);
+
+  for (i = n - 1; i > 0; i--)
+    {
+      vp++;
+      rp++;
+      mpn_addmul_1 (rp, up, i, vp[0]);
+    }
+}
+#endif
+
+
+#if MULLO_VARIANT == 2
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t h;
+
+  h = up[0] * vp[n - 1];
+
+  if (n != 1)
+    {
+      mp_size_t i;
+      mp_limb_t v0;
+
+      v0 = *vp++;
+      h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0);
+      rp++;
+
+      for (i = n - 2; i > 0; i--)
+	{
+	  v0 = *vp++;
+	  h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0);
+	  rp++;
+	}
+    }
+
+  rp[0] = h;
+}
+#endif

diff --git a/mpn/generic/mullo_n.c b/mpn/generic/mullo_n.c
new file mode 100644
index 0000000..6f4e7ae
--- /dev/null
+++ b/mpn/generic/mullo_n.c

@@ -0,0 +1,243 @@
+/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
+   of their products.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22   1
+#else
+#define MAYBE_range_basecase                                           \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22                                             \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
+#endif
+
+/*  THINK: The DC strategy uses different constants in different Toom's
+	 ranges. Something smoother?
+*/
+
+/*
+  Compute the least significant half of the product {xy,n}*{yp,n}, or
+  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+  Above the given threshold, the Divide and Conquer strategy is used.
+  The operands are split in two, and a full product plus two mullo
+  are used to obtain the final result. The more natural strategy is to
+  split in two halves, but this is far from optimal when a
+  sub-quadratic multiplication is used.
+
+  Mulders suggests an unbalanced split in favour of the full product,
+  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+  To compute the value of a, we assume that the cost of mullo for a
+  given size ML(n) is a fraction of the cost of a full product with
+  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+  then we can write:
+
+  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+  Given a value for e, want to minimise the value of k, i.e. the
+  function k=(1-a)^e/(1-2*a^e).
+
+  With e=2, the exponent for schoolbook multiplication, the minimum is
+  given by the values a=1-a=1/2.
+
+  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+  Other possible approximations follow:
+  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+  The values above where obtained with the following trivial commands
+  in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+  ,
+  |\
+  | \
+  +----,
+  |    |
+  |    |
+  |    |\
+  |    | \
+  +----+--`
+  ^ n2 ^n1^
+
+  For an actual implementation, the assumption that M(n)=n^e is
+  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+  with a constant k is wrong.
+
+  But theory suggest us two things:
+  - the best the multiplication product is (lower e), the more k
+    approaches 1, and a approaches 0.
+
+  - A value for a smaller than optimal is probably less bad than a
+    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_mullo_n_itch (mp_size_t n)
+{
+  return 2*n;
+}
+
+/*
+    mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
+    It accepts tp == rp.
+*/
+static void
+mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t n2, n1;
+  ASSERT (n >= 2);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+  /* Divide-and-conquer */
+
+  /* We need fractional approximation of the value 0 < a <= 1/2
+     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+  */
+  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
+    n1 = n >> 1;
+  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
+    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
+    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
+    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
+  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
+  else
+    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
+
+  n2 = n - n1;
+
+  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
+	      y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+  /* x0 * y0 */
+  mpn_mul_n (tp, xp, yp, n2);
+  MPN_COPY (rp, tp, n2);
+
+  /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
+  mpn_add_n (rp + n2, tp + n2, tp + n, n1);
+
+  /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
+  mpn_add_n (rp + n2, rp + n2, tp + n, n1);
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
+#define MUL_BASECASE_ALLOC \
+ (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_mullow_n
+   accepts a pointer tp, and handle the case tp == rp, do the same here.
+   Maybe recombine the two functions.
+   THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
+	  (typically thanks to mpn_addmul_2) should we unconditionally use
+	  mpn_mul_n?
+*/
+
+void
+mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+
+  if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t tp[MUL_BASECASE_ALLOC];
+      mpn_mul_basecase (tp, xp, n, yp, n);
+      MPN_COPY (rp, tp, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
+    {
+      mpn_mullo_basecase (rp, xp, yp, n);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
+      if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
+	{
+	  mpn_dc_mullo_n (rp, xp, yp, n, tp);
+	}
+      else
+	{
+	  /* For really large operands, use plain mpn_mul_n but throw away upper n
+	     limbs of result.  */
+#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
+	  mpn_fft_mul (tp, xp, n, yp, n);
+#else
+	  mpn_mul_n (tp, xp, yp, n);
+#endif
+	  MPN_COPY (rp, tp, n);
+	}
+      TMP_FREE;
+    }
+}

diff --git a/mpn/generic/mulmid.c b/mpn/generic/mulmid.c
new file mode 100644
index 0000000..f35c5fb
--- /dev/null
+++ b/mpn/generic/mulmid.c

@@ -0,0 +1,255 @@
+/* mpn_mulmid -- middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
+
+
+void
+mpn_mulmid (mp_ptr rp,
+            mp_srcptr ap, mp_size_t an,
+            mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn, k;
+  mp_ptr scratch, temp;
+
+  ASSERT (an >= bn);
+  ASSERT (bn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
+
+  if (bn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not tall enough to make toom42 worthwhile for any portion */
+
+      if (an < CHUNK)
+	{
+	  /* region not too wide either, just call basecase directly */
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  return;
+	}
+
+      /* Region quite wide. For better locality, use basecase on chunks:
+
+	 AAABBBCC..
+	 .AAABBBCC.
+	 ..AAABBBCC
+      */
+
+      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+
+      /* remaining chunks (B, C, etc) */
+      an -= k;
+
+      while (an >= CHUNK)
+	{
+	  mp_limb_t t0, t1, cy;
+	  ap += k, rp += k;
+	  t0 = rp[0], t1 = rp[1];
+	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
+	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
+	  an -= k;
+	}
+
+      if (an >= bn)
+	{
+	  /* last remaining chunk */
+	  mp_limb_t t0, t1, cy;
+	  ap += k, rp += k;
+	  t0 = rp[0], t1 = rp[1];
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);
+	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
+	}
+
+      return;
+    }
+
+  /* region is tall enough for toom42 */
+
+  rn = an - bn + 1;
+
+  if (rn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not wide enough to make toom42 worthwhile for any portion */
+
+      TMP_DECL;
+
+      if (bn < CHUNK)
+	{
+	  /* region not too tall either, just call basecase directly */
+	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
+	  return;
+	}
+
+      /* Region quite tall. For better locality, use basecase on chunks:
+
+	 AAAAA....
+	 .AAAAA...
+	 ..BBBBB..
+	 ...BBBBB.
+	 ....CCCCC
+      */
+
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2);
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - CHUNK, an -= bn - CHUNK;
+      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= CHUNK;
+
+      while (bn >= CHUNK)
+	{
+	  ap += CHUNK, bp -= CHUNK;
+	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
+	  mpn_add_n (rp, rp, temp, rn + 2);
+	  bn -= CHUNK;
+	}
+
+      if (bn)
+	{
+	  /* last remaining chunk */
+	  ap += CHUNK, bp -= bn;
+	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
+	  mpn_add_n (rp, rp, temp, rn + 2);
+	}
+
+      TMP_FREE;
+      return;
+    }
+
+  /* we're definitely going to use toom42 somewhere */
+
+  if (bn > rn)
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+	 the last:
+
+         AA....
+         .AA...
+         ..BB..
+         ...BB.
+         ....CC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
+      scratch = temp + rn + 2;
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - rn;
+      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= rn;
+
+      while (bn >= rn)
+        {
+          ap += rn, bp -= rn;
+	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
+          mpn_add_n (rp, rp, temp, rn + 2);
+          bn -= rn;
+        }
+
+      if (bn)
+        {
+          /* last remaining chunk */
+          ap += rn, bp -= bn;
+	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
+          mpn_add_n (rp, rp, temp, rn + 2);
+        }
+
+      TMP_FREE;
+    }
+  else
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+	 the last:
+
+         AAABBBCC..
+         .AAABBBCC.
+         ..AAABBBCC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      rn -= bn;
+
+      while (rn >= bn)
+        {
+	  mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
+	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
+	  rn -= bn;
+        }
+
+      TMP_FREE;
+
+      if (rn)
+        {
+          /* last remaining chunk */
+	  mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
+	  ADDC_LIMB (cy, rp[0], rp[0], t0);
+	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
+        }
+    }
+}

diff --git a/mpn/generic/mulmid_basecase.c b/mpn/generic/mulmid_basecase.c
new file mode 100644
index 0000000..d5434ea
--- /dev/null
+++ b/mpn/generic/mulmid_basecase.c

@@ -0,0 +1,82 @@
+/* mpn_mulmid_basecase -- classical middle product algorithm
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
+   Must have un >= vn >= 1.
+
+   Neither input buffer may overlap with the output buffer. */
+
+void
+mpn_mulmid_basecase (mp_ptr rp,
+                     mp_srcptr up, mp_size_t un,
+                     mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t lo, hi;  /* last two limbs of output */
+  mp_limb_t cy;
+
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
+
+  up += vn - 1;
+  un -= vn - 1;
+
+  /* multiply by first limb, store result */
+  lo = mpn_mul_1 (rp, up, un, vp[0]);
+  hi = 0;
+
+  /* accumulate remaining rows */
+  for (vn--; vn; vn--)
+    {
+      up--, vp++;
+      cy = mpn_addmul_1 (rp, up, un, vp[0]);
+      add_ssaaaa (hi, lo, hi, lo, CNST_LIMB(0), cy);
+    }
+
+  /* store final limbs */
+#if GMP_NAIL_BITS != 0
+  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
+  lo &= GMP_NUMB_MASK;
+#endif
+
+  rp[un] = lo;
+  rp[un + 1] = hi;
+}

diff --git a/mpn/generic/mulmid_n.c b/mpn/generic/mulmid_n.c
new file mode 100644
index 0000000..ac7e8f1
--- /dev/null
+++ b/mpn/generic/mulmid_n.c

@@ -0,0 +1,61 @@
+/* mpn_mulmid_n -- balanced middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+void
+mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  if (n < MULMID_TOOM42_THRESHOLD)
+    {
+      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
+    }
+  else
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
+      mpn_toom42_mulmid (rp, ap, bp, n, scratch);
+      TMP_FREE;
+    }
+}

diff --git a/mpn/generic/mulmod_bknp1.c b/mpn/generic/mulmod_bknp1.c
new file mode 100644
index 0000000..feb10eb
--- /dev/null
+++ b/mpn/generic/mulmod_bknp1.c

@@ -0,0 +1,502 @@
+/* Mulptiplication mod B^n+1, for small operands.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2020-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+
+/* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
+static void
+_mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
+{
+  mp_limb_t hl;
+  mp_srcptr hp;
+  unsigned i;
+
+#if MOD_BKNP1_ONLY3
+  ASSERT (k == 3);
+  k = 3;
+#endif
+  ASSERT (k > 2);
+  ASSERT (k % 2 == 1);
+
+  --k;
+
+  rp += k * n;
+  op += k * n;
+  hp = op;
+  hl = hp[n]; /* initial op[k*n]. */
+  ASSERT (hl < GMP_NUMB_MAX - 1);
+
+#if MOD_BKNP1_ONLY3 == 0
+  /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
+     rp[n] = cy;						*/
+  *rp = 0;
+#endif
+
+  i = k >> 1;
+  do
+   {
+     mp_limb_t cy, bw;
+     rp -= n;
+     op -= n;
+     cy = hl + mpn_add_n (rp, op, hp, n);
+#if MOD_BKNP1_ONLY3
+     rp[n] = cy;
+#else
+     MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
+#endif
+     rp -= n;
+     op -= n;
+     bw = hl + mpn_sub_n (rp, op, hp, n);
+     MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
+   }
+  while (--i != 0);
+
+  for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */
+    {
+      *rp = 0;
+      i = k >> 1;
+      do
+	{
+	  rp -= n;
+	  MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
+	  rp -= n;
+	  MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
+	}
+      while (--i != 0);
+    }
+}
+
+static void
+_mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+  ASSERT (r[n] == h);
+
+  /* Fully normalise */
+  MPN_DECR_U (r, n + 1, h);
+  h -= r[n];
+  r[n] = 0;
+  MPN_INCR_U (r, n + 1, h);
+}
+
+static void
+_mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+  r[n] = 0;
+  MPN_INCR_U (r, n + 1, -h);
+  if (UNLIKELY (r[n] != 0))
+    _mpn_modbnp1_pn_ip (r, n, 1);
+}
+
+static void
+_mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
+{
+  if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
+    {
+      _mpn_modbnp1_neg_ip (r, n, h);
+    }
+  else
+    {
+      r[n] = h;
+      if (h)
+	_mpn_modbnp1_pn_ip(r, n, h);
+    }
+}
+
+/* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
+/* Used when rn < on < 2*rn. */
+static void
+_mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
+{
+  mp_limb_t bw;
+
+#if 0
+  if (UNLIKELY (on <= rn))
+    {
+      MPN_COPY (rp, op, on);
+      MPN_ZERO (rp + on, rn - on);
+      return;
+    }
+#endif
+
+  ASSERT (on > rn);
+  ASSERT (on <= 2 * rn);
+
+  bw = mpn_sub (rp, op, rn, op + rn, on - rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, bw);
+}
+
+/* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
+/* With odd k >= 3. */
+static void
+_mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
+{
+  mp_limb_t cy;
+
+#if MOD_BKNP1_ONLY3
+  ASSERT (k == 3);
+  k = 3;
+#endif
+  ASSERT (k & 1);
+  k >>= 1;
+  ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);
+  ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
+
+  cy = - mpn_sub_n (rp, op, op + rn, rn);
+  for (;;) {
+    op += 2 * rn;
+    cy += mpn_add_n (rp, rp, op, rn);
+    if (--k == 0)
+      break;
+    cy -= mpn_sub_n (rp, rp, op + rn, rn);
+  };
+
+  cy += op[rn];
+  _mpn_modbnp1_nc_ip (rp, rn, cy);
+}
+
+/* For the various mpn_divexact_byN here, fall back to using either
+   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
+   faster if it is native.  For now, since mpn_divexact_1 is native on
+   platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+
+#ifndef mpn_divexact_by5
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_5 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
+#define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
+#else
+#define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
+#endif
+#endif
+
+#ifndef mpn_divexact_by7
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_7 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
+#define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
+#else
+#define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
+#endif
+#endif
+
+#ifndef mpn_divexact_by11
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_11 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
+#define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
+#else
+#define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
+#endif
+#endif
+
+#ifndef mpn_divexact_by13
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_13 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
+#define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
+#else
+#define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
+#endif
+#endif
+
+#ifndef mpn_divexact_by17
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define BINVERT_17 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
+#define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
+#else
+#define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
+#endif
+#endif
+
+/* Thanks to Chinese remainder theorem, store
+   in {rp, k*n+1} the value mod (B^(k*n)+1), given
+   {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
+   {bp, n+1} mod (B^n+1) .
+   {tp, n+1} is a scratch area.
+   tp == rp or rp == ap are possible.
+*/
+static void
+_mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+	  mp_size_t n, unsigned k, mp_ptr tp)
+{
+  mp_limb_t mod;
+  unsigned i;
+
+#if MOD_BKNP1_ONLY3
+  ASSERT (k == 3);
+  k = 3;
+#endif
+  _mpn_modbnp1_kn (tp, ap, n, k);
+  if (mpn_sub_n (tp, bp, tp, n + 1))
+    _mpn_modbnp1_neg_ip (tp, n, tp[n]);
+
+#if MOD_BKNP1_USE11
+  if (UNLIKELY (k == 11))
+    {
+      ASSERT (GMP_NUMB_BITS % 2 == 0);
+      /* mod <- -Mod(B^n+1,11)^-1 */
+      mod = n * (GMP_NUMB_BITS % 5) % 5;
+      if ((mod > 2) || UNLIKELY (mod == 0))
+	mod += 5;
+
+      mod *= mpn_mod_1 (tp, n + 1, 11);
+    }
+  else
+#endif
+    {
+#if GMP_NUMB_BITS % 8 == 0
+  /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)	*/
+  /* (2^6 - 1) = 3^2 * 7			*/
+  mod = mpn_mod_34lsub1 (tp, n + 1);
+  ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
+  /* (2^12 - 1) = 3^2 * 5 * 7 * 13		*/
+  /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241	*/
+  ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
+
+#if GMP_NUMB_BITS % 3 != 0
+  if (UNLIKELY (k != 3))
+    {
+      ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
+      if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+	mod <<= 1; /* k >> 1 = 1 << 1 */
+      else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
+	mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
+      else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+	mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
+      else /* k == 17 */
+	mod <<= 3; /* k >> 1 = 1 << 3 */
+#if 0
+      if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
+	  (GMP_NUMB_BITS == 16) && (k == 13))
+	mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
+	       (mod >> (3 * GMP_NUMB_BITS >> 2)));
+#endif
+    }
+#else
+  ASSERT (GMP_NUMB_MAX % k == 0);
+  /* 2^{GMP_NUMB_BITS} - 1	= 0 (mod k) */
+  /* 2^{GMP_NUMB_BITS}		= 1 (mod k) */
+  /* 2^{n*GMP_NUMB_BITS} + 1	= 2 (mod k) */
+  /* -2^{-1}	= k >> 1 (mod k) */
+  mod *= k >> 1;
+#endif
+#else
+  ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
+#endif
+    }
+
+  MPN_INCR_U (tp, n + 1, mod);
+  tp[n] += mod;
+
+  if (LIKELY (k == 3))
+    ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
+  else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
+    mpn_divexact_by5 (tp, tp, n + 1);
+  else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))
+	   || LIKELY (k == 7))
+    mpn_divexact_by7 (tp, tp, n + 1);
+#if MOD_BKNP1_USE11
+  else if (k == 11)
+    mpn_divexact_by11 (tp, tp, n + 1);
+#endif
+  else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
+    mpn_divexact_by13 (tp, tp, n + 1);
+  else /* (k == 17) */
+    mpn_divexact_by17 (tp, tp, n + 1);
+
+  rp += k * n;
+  ap += k * n; /* tp - 1 */
+
+  rp -= n;
+  ap -= n;
+  ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
+
+  i = k >> 1;
+  do
+   {
+      mp_limb_t cy, bw;
+      rp -= n;
+      ap -= n;
+      bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
+      MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
+      rp -= n;
+      ap -= n;
+      cy = mpn_add_n (rp, ap, tp, n) + tp[n];
+      MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
+    }
+  while (--i != 0);
+
+  /* if (LIKELY (rp[k * n])) */
+    _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
+}
+
+
+static void
+_mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+  unsigned k;
+
+  ASSERT (0 < rn);
+  ASSERT ((ap[rn] | bp[rn]) <= 1);
+
+  if (UNLIKELY (ap[rn] | bp[rn]))
+    {
+      if (ap[rn])
+	cy = bp[rn] + mpn_neg (rp, bp, rn);
+      else /* ap[rn] == 0 */
+	cy = mpn_neg (rp, ap, rn);
+    }
+  else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+    {
+      rn /= k;
+      mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
+      return;
+    }
+  else
+    {
+      mpn_mul_n (tp, ap, bp, rn);
+      cy = mpn_sub_n (rp, tp, tp + rn, rn);
+    }
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
+/* tp must point to at least 4*(k-1)*n+1 limbs*/
+void
+mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
+		  mp_size_t n, unsigned k, mp_ptr tp)
+{
+  mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+  ASSERT (k == 3);
+  k = 3;
+#endif
+  ASSERT (k > 2);
+  ASSERT (k % 2 == 1);
+
+  /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+  /* b % (B^{nn}+1)/(B^{nn/k}+1) */
+  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
+  mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
+  _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+  hp = tp + k * n + 1;
+  /* a % (B^{nn/k}+1) */
+  ASSERT (ap[k * n] <= 1);
+  _mpn_modbnp1_kn (hp, ap, n, k);
+  /* b % (B^{nn/k}+1) */
+  ASSERT (bp[k * n] <= 1);
+  _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
+  _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
+
+  _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
+}
+
+
+static void
+_mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+  unsigned k;
+
+  ASSERT (0 < rn);
+
+  if (UNLIKELY (ap[rn]))
+    {
+      ASSERT (ap[rn] == 1);
+      *rp = 1;
+      MPN_FILL (rp + 1, rn, 0);
+      return;
+    }
+  else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
+    {
+      rn /= k;
+      mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
+      return;
+    }
+  else
+    {
+      mpn_sqr (tp, ap, rn);
+      cy = mpn_sub_n (rp, tp, tp + rn, rn);
+    }
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
+/* tp must point to at least 3*(k-1)*n+1 limbs*/
+void
+mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
+		  mp_size_t n, unsigned k, mp_ptr tp)
+{
+  mp_ptr hp;
+
+#if MOD_BKNP1_ONLY3
+  ASSERT (k == 3);
+  k = 3;
+#endif
+  ASSERT (k > 2);
+  ASSERT (k % 2 == 1);
+
+  /* a % (B^{nn}+1)/(B^{nn/k}+1) */
+  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
+  mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
+  _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
+
+  hp = tp + k * n + 1;
+  /* a % (B^{nn/k}+1) */
+  ASSERT (ap[k * n] <= 1);
+  _mpn_modbnp1_kn (hp, ap, n, k);
+  _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
+
+  _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
+}

diff --git a/mpn/generic/mulmod_bnm1.c b/mpn/generic/mulmod_bnm1.c
new file mode 100644
index 0000000..8229ede
--- /dev/null
+++ b/mpn/generic/mulmod_bnm1.c

@@ -0,0 +1,374 @@
+/* mulmod_bnm1.c -- multiplication mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013, 2020, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+void
+mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_mul_n (tp, ap, bp, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
+   normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+		    mp_ptr tp)
+{
+  mp_limb_t cy;
+  unsigned k;
+
+  ASSERT (0 < rn);
+
+  if (UNLIKELY (ap[rn] | bp [rn]))
+    {
+      if (ap[rn])
+	cy = bp [rn] + mpn_neg (rp, bp, rn);
+      else /* ap[rn] == 0 */
+	cy = mpn_neg (rp, ap, rn);
+    }
+  else if (MPN_MULMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t n_k = rn / k;
+      TMP_DECL;
+
+      TMP_MARK;
+      mpn_mulmod_bknp1 (rp, ap, bp, n_k, k,
+                       TMP_ALLOC_LIMBS (mpn_mulmod_bknp1_itch (rn)));
+      TMP_FREE;
+      return;
+    }
+  else
+    {
+      mpn_mul_n (tp, ap, bp, rn);
+      cy = mpn_sub_n (rp, tp, tp + rn, rn);
+    }
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if one of the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+ * combine results and obtain a natural number when one knows in
+ * advance that the final value is less than (B^rn-1).
+ * Moreover it should not be a problem if mulmod_bnm1 is used to
+ * compute the full product with an+bn <= rn, because this condition
+ * implies (B^an-1)(B^bn-1) < (B^rn-1) .
+ *
+ * Requires 0 < bn <= an <= rn and an + bn > rn/2
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
+ *
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
+ */
+void
+mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
+{
+  ASSERT (0 < bn);
+  ASSERT (bn <= an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (bn < rn))
+	{
+	  if (UNLIKELY (an + bn <= rn))
+	    {
+	      mpn_mul (rp, ap, an, bp, bn);
+	    }
+	  else
+	    {
+	      mp_limb_t cy;
+	      mpn_mul (tp, ap, an, bp, bn);
+	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
+	      MPN_INCR_U (rp, rn, cy);
+	    }
+	}
+      else
+	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      /* We need at least an + bn >= n, to be able to fit one of the
+	 recursive products at rp. Requiring strict inequality makes
+	 the code slightly simpler. If desired, we could avoid this
+	 restriction by initially halving rn as long as rn is even and
+	 an + bn <= rn/2. */
+
+      ASSERT (an + bn > n);
+
+      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
+	 and crt together as
+
+	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+#define b0 bp
+#define b1 (bp + n)
+
+#define xp  tp	/* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+      /* bm1  maybe in {xp + n, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+      /* bp1  maybe in {sp1 + n + 1, n + 1} */
+
+      {
+	mp_srcptr am1, bm1;
+	mp_size_t anm, bnm;
+	mp_ptr so;
+
+	bm1 = b0;
+	bnm = bn;
+	if (LIKELY (an > n))
+	  {
+	    am1 = xp;
+	    cy = mpn_add (xp, a0, n, a1, an - n);
+	    MPN_INCR_U (xp, n, cy);
+	    anm = n;
+	    so = xp + n;
+	    if (LIKELY (bn > n))
+	      {
+		bm1 = so;
+		cy = mpn_add (so, b0, n, b1, bn - n);
+		MPN_INCR_U (so, n, cy);
+		bnm = n;
+		so += n;
+	      }
+	  }
+	else
+	  {
+	    so = xp;
+	    am1 = a0;
+	    anm = an;
+	  }
+
+	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
+      }
+
+      {
+	int       k;
+	mp_srcptr ap1, bp1;
+	mp_size_t anp, bnp;
+
+	bp1 = b0;
+	bnp = bn;
+	if (LIKELY (an > n)) {
+	  ap1 = sp1;
+	  cy = mpn_sub (sp1, a0, n, a1, an - n);
+	  sp1[n] = 0;
+	  MPN_INCR_U (sp1, n + 1, cy);
+	  anp = n + ap1[n];
+	  if (LIKELY (bn > n)) {
+	    bp1 = sp1 + n + 1;
+	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
+	    sp1[2*n+1] = 0;
+	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
+	    bnp = n + bp1[n];
+	  }
+	} else {
+	  ap1 = a0;
+	  anp = an;
+	}
+
+	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+	  k=0;
+	else
+	  {
+	    int mask;
+	    k = mpn_fft_best_k (n, 0);
+	    mask = (1<<k) - 1;
+	    while (n & mask) {k--; mask >>=1;};
+	  }
+	if (k >= FFT_FIRST_K)
+	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
+	else if (UNLIKELY (bp1 == b0))
+	  {
+	    ASSERT (anp + bnp <= 2*n+1);
+	    ASSERT (anp + bnp > n);
+	    ASSERT (anp >= bnp);
+	    mpn_mul (xp, ap1, anp, bp1, bnp);
+	    anp = anp + bnp - n;
+	    ASSERT (anp <= n || xp[2*n]==0);
+	    anp-= anp > n;
+	    cy = mpn_sub (xp, xp, n, xp + n, anp);
+	    xp[n] = 0;
+	    MPN_INCR_U (xp, n+1, cy);
+	  }
+	else
+	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (an + bn < rn))
+	{
+	  /* Note that in this case, the only way the result can equal
+	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
+	     then the output of both the recursive calls and this CRT
+	     reconstruction is zero, not B^{rn} - 1. Which is good,
+	     since the latter representation doesn't fit in the output
+	     area.*/
+	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
+
+	  /* FIXME: This subtraction of the high parts is not really
+	     necessary, we do it to get the carry out, and for sanity
+	     checking. */
+	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
+				   xp + an + bn - n, rn - (an + bn), cy);
+	  ASSERT (an + bn == rn - 1 ||
+		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
+	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
+	  ASSERT (cy == (xp + an + bn - n)[0]);
+	}
+      else
+	{
+	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+	     DECR will affect _at most_ the lowest n limbs. */
+	  MPN_DECR_U (rp, 2*n, cy);
+	}
+#undef a0
+#undef a1
+#undef b0
+#undef b1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_mulmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
+}

diff --git a/mpn/generic/neg.c b/mpn/generic/neg.c
new file mode 100644
index 0000000..bec2a32
--- /dev/null
+++ b/mpn/generic/neg.c

@@ -0,0 +1,33 @@
+/* mpn_neg - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_neg 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/nussbaumer_mul.c b/mpn/generic/nussbaumer_mul.c
new file mode 100644
index 0000000..3e0cf27
--- /dev/null
+++ b/mpn/generic/nussbaumer_mul.c

@@ -0,0 +1,70 @@
+/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
+   Nussbaumer's negacyclic convolution.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
+void
+mpn_nussbaumer_mul (mp_ptr pp,
+		    mp_srcptr ap, mp_size_t an,
+		    mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (an >= bn);
+  ASSERT (bn > 0);
+
+  TMP_MARK;
+
+  if ((ap == bp) && (an == bn))
+    {
+      rn = mpn_sqrmod_bnm1_next_size (2*an);
+      tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
+      mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
+    }
+  else
+    {
+      rn = mpn_mulmod_bnm1_next_size (an + bn);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
+      mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
+    }
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/perfpow.c b/mpn/generic/perfpow.c
new file mode 100644
index 0000000..9d46477
--- /dev/null
+++ b/mpn/generic/perfpow.c

@@ -0,0 +1,342 @@
+/* mpn_perfect_power_p -- mpn perfect power detection.
+
+   Contributed to the GNU project by Martin Boij.
+
+Copyright 2009, 2010, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define SMALL 20
+#define MEDIUM 100
+
+/* Return non-zero if {np,nn} == {xp,xn} ^ k.
+   Algorithm:
+       For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
+       {xp,xn}^k. Stop if they don't match the s least significant limbs of
+       {np,nn}.
+
+   FIXME: Low xn limbs can be expected to always match, if computed as a mod
+   B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
+   most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
+   compare to {np, nn}. Or use an even cruder approximation based on fix-point
+   base 2 logarithm.  */
+static int
+pow_equals (mp_srcptr np, mp_size_t n,
+	    mp_srcptr xp,mp_size_t xn,
+	    mp_limb_t k, mp_bitcnt_t f,
+	    mp_ptr tp)
+{
+  mp_bitcnt_t y, z;
+  mp_size_t bn;
+  mp_limb_t h, l;
+
+  ASSERT (n > 1 || (n == 1 && np[0] > 1));
+  ASSERT (np[n - 1] > 0);
+  ASSERT (xn > 0);
+
+  if (xn == 1 && xp[0] == 1)
+    return 0;
+
+  z = 1 + (n >> 1);
+  for (bn = 1; bn < z; bn <<= 1)
+    {
+      mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
+      if (mpn_cmp (tp, np, bn) != 0)
+	return 0;
+    }
+
+  /* Final check. Estimate the size of {xp,xn}^k before computing the power
+     with full precision.  Optimization: It might pay off to make a more
+     accurate estimation of the logarithm of {xp,xn}, rather than using the
+     index of the MSB.  */
+
+  MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
+  y -= 1;  /* msb_index (xp, xn) */
+
+  umul_ppmm (h, l, k, y);
+  h -= l == 0;  --l;	/* two-limb decrement */
+
+  z = f - 1; /* msb_index (np, n) */
+  if (h == 0 && l <= z)
+    {
+      mp_limb_t *tp2;
+      mp_size_t i;
+      int ans;
+      mp_limb_t size;
+      TMP_DECL;
+
+      size = l + k;
+      ASSERT_ALWAYS (size >= k);
+
+      TMP_MARK;
+      y = 2 + size / GMP_LIMB_BITS;
+      tp2 = TMP_ALLOC_LIMBS (y);
+
+      i = mpn_pow_1 (tp, xp, xn, k, tp2);
+      if (i == n && mpn_cmp (tp, np, n) == 0)
+	ans = 1;
+      else
+	ans = 0;
+      TMP_FREE;
+      return ans;
+    }
+
+  return 0;
+}
+
+
+/* Return non-zero if N = {np,n} is a kth power.
+   I = {ip,n} = N^(-1) mod B^n.  */
+static int
+is_kth_power (mp_ptr rp, mp_srcptr np,
+	      mp_limb_t k, mp_srcptr ip,
+	      mp_size_t n, mp_bitcnt_t f,
+	      mp_ptr tp)
+{
+  mp_bitcnt_t b;
+  mp_size_t rn, xn;
+
+  ASSERT (n > 0);
+  ASSERT ((k & 1) != 0 || k == 2);
+  ASSERT ((np[0] & 1) != 0);
+
+  if (k == 2)
+    {
+      b = (f + 1) >> 1;
+      rn = 1 + b / GMP_LIMB_BITS;
+      if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
+	{
+	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+	  xn = rn;
+	  MPN_NORMALIZE (rp, xn);
+	  if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
+	    return 1;
+
+	  /* Check if (2^b - r)^2 == n */
+	  mpn_neg (rp, rp, rn);
+	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+	  MPN_NORMALIZE (rp, rn);
+	  if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+	    return 1;
+	}
+    }
+  else
+    {
+      b = 1 + (f - 1) / k;
+      rn = 1 + (b - 1) / GMP_LIMB_BITS;
+      mpn_brootinv (rp, ip, rn, k, tp);
+      if ((b % GMP_LIMB_BITS) != 0)
+	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+      MPN_NORMALIZE (rp, rn);
+      if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
+	return 1;
+    }
+  MPN_ZERO (rp, rn); /* Untrash rp */
+  return 0;
+}
+
+static int
+perfpow (mp_srcptr np, mp_size_t n,
+	 mp_limb_t ub, mp_limb_t g,
+	 mp_bitcnt_t f, int neg)
+{
+  mp_ptr ip, tp, rp;
+  mp_limb_t k;
+  int ans;
+  mp_bitcnt_t b;
+  gmp_primesieve_t ps;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT ((np[0] & 1) != 0);
+  ASSERT (ub > 0);
+
+  TMP_MARK;
+  gmp_init_primesieve (&ps);
+  b = (f + 3) >> 1;
+
+  TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n);
+
+  MPN_ZERO (rp, n);
+
+  /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
+     roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
+     similarly for nth roots. It should be more efficient to compute n^{1/2} as
+     n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
+     similar for kth roots if we switch to an iteration converging to n^{1/k -
+     1}, and we can then eliminate this binvert call. */
+  mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+  if (b % GMP_LIMB_BITS)
+    ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
+
+  if (neg)
+    gmp_nextprime (&ps);
+
+  ans = 0;
+  if (g > 0)
+    {
+      ub = MIN (ub, g + 1);
+      while ((k = gmp_nextprime (&ps)) < ub)
+	{
+	  if ((g % k) == 0)
+	    {
+	      if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+		{
+		  ans = 1;
+		  goto ret;
+		}
+	    }
+	}
+    }
+  else
+    {
+      while ((k = gmp_nextprime (&ps)) < ub)
+	{
+	  if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
+	    {
+	      ans = 1;
+	      goto ret;
+	    }
+	}
+    }
+ ret:
+  TMP_FREE;
+  return ans;
+}
+
+static const unsigned short nrtrial[] = { 100, 500, 1000 };
+
+/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
+   number.  */
+static const double logs[] =
+  { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+
+int
+mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
+{
+  mp_limb_t *nc, factor, g;
+  mp_limb_t exp, d;
+  mp_bitcnt_t twos, count;
+  int ans, where, neg, trial;
+  TMP_DECL;
+
+  neg = n < 0;
+  if (neg)
+    {
+      n = -n;
+    }
+
+  if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like
+					   (n <= (np[0] == 1)) */
+    return 1;
+
+  TMP_MARK;
+
+  count = 0;
+
+  twos = mpn_scan1 (np, 0);
+  if (twos != 0)
+    {
+      mp_size_t s;
+      if (twos == 1)
+	{
+	  return 0;
+	}
+      s = twos / GMP_LIMB_BITS;
+      if (s + 1 == n && POW2_P (np[s]))
+	{
+	  return ! (neg && POW2_P (twos));
+	}
+      count = twos % GMP_LIMB_BITS;
+      n -= s;
+      np += s;
+      if (count > 0)
+	{
+	  nc = TMP_ALLOC_LIMBS (n);
+	  mpn_rshift (nc, np, n, count);
+	  n -= (nc[n - 1] == 0);
+	  np = nc;
+	}
+    }
+  g = twos;
+
+  trial = (n > SMALL) + (n > MEDIUM);
+
+  where = 0;
+  factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+
+  if (factor != 0)
+    {
+      if (count == 0) /* We did not allocate nc yet. */
+	{
+	  nc = TMP_ALLOC_LIMBS (n);
+	}
+
+      /* Remove factors found by trialdiv.  Optimization: If remove
+	 define _itch, we can allocate its scratch just once */
+
+      do
+	{
+	  binvert_limb (d, factor);
+
+	  /* After the first round we always have nc == np */
+	  exp = mpn_remove (nc, &n, np, n, &d, 1, ~(mp_bitcnt_t)0);
+
+	  if (g == 0)
+	    g = exp;
+	  else
+	    g = mpn_gcd_1 (&g, 1, exp);
+
+	  if (g == 1)
+	    {
+	      ans = 0;
+	      goto ret;
+	    }
+
+	  if ((n == 1) & (nc[0] == 1))
+	    {
+	      ans = ! (neg && POW2_P (g));
+	      goto ret;
+	    }
+
+	  np = nc;
+	  factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
+	}
+      while (factor != 0);
+    }
+
+  MPN_SIZEINBASE_2EXP(count, np, n, 1);   /* log (np) + 1 */
+  d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
+  ans = perfpow (np, n, d, g, count, neg);
+
+ ret:
+  TMP_FREE;
+  return ans;
+}

diff --git a/mpn/generic/perfsqr.c b/mpn/generic/perfsqr.c
new file mode 100644
index 0000000..1ea5c84
--- /dev/null
+++ b/mpn/generic/perfsqr.c

@@ -0,0 +1,238 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+   zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "perfsqr.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+
+/* PERFSQR_MOD_* detects non-squares using residue tests.
+
+   A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes
+   {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or
+   64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
+   or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
+   used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
+   case too.
+
+   PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
+   PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
+   data indicating residues and non-residues modulo those divisors.  The
+   table data is in 1 or 2 limbs worth of bits respectively, per the size of
+   each d.
+
+   A "modexact" style remainder is taken to reduce r modulo d.
+   PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
+   the table data.  Notice there's just one multiplication by a constant
+   "inv", for each d.
+
+   The modexact doesn't produce a true r%d remainder, instead idx satisfies
+   "-(idx<<PERFSQR_MOD_BITS) == r mod d".  Because d is odd, this factor
+   -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
+   accounted for by having the table data suitably permuted.
+
+   The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
+   In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
+   each divisor d meaning the modexact multiply can take place entirely
+   within one limb, giving the compiler the chance to optimize it, in a way
+   that say umul_ppmm would not give.
+
+   There's no need for the divisors d to be prime, in fact gen-psqr.c makes
+   a deliberate effort to combine factors so as to reduce the number of
+   separate tests done on r.  But such combining is limited to d <=
+   2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
+
+   Alternatives:
+
+   It'd be possible to use bigger divisors d, and more than 2 limbs of table
+   data, but this doesn't look like it would be of much help to the prime
+   factors in the usual moduli 2^24-1 or 2^48-1.
+
+   The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
+   just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
+   factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have
+   fewer prime factors.
+
+   The nails case usually ends up using mpn_mod_1, which is a lot slower
+   than mpn_mod_34lsub1.  Perhaps other such special moduli could be found
+   for the nails case.  Two-term things like 2^30-2^15-1 might be
+   candidates.  Or at worst some on-the-fly de-nailing would allow the plain
+   2^24-1 to be used.  Currently nails are too preliminary to be worried
+   about.
+
+*/
+
+#define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+#define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)
+#define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)
+
+#define PERFSQR_MOD_34(r, up, usize)				\
+  do {								\
+    (r) = mpn_mod_34lsub1 (up, usize);				\
+    (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);		\
+  } while (0)
+
+/* FIXME: The %= here isn't good, and might destroy any savings from keeping
+   the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
+   Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
+   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.  */
+#define PERFSQR_MOD_PP(r, up, usize)					\
+  do {									\
+    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))	\
+      {									\
+	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,		\
+				PERFSQR_PP_INVERTED);			\
+	(r) %= PERFSQR_PP;						\
+      }									\
+    else								\
+      {									\
+	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);			\
+      }									\
+  } while (0)
+
+#define PERFSQR_MOD_IDX(idx, r, d, inv)				\
+  do {								\
+    mp_limb_t  q;						\
+    ASSERT ((r) <= PERFSQR_MOD_MASK);				\
+    ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);		\
+    ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);		\
+								\
+    q = ((r) * (inv)) & PERFSQR_MOD_MASK;			\
+    ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));		\
+    (idx) = (q * (d)) >> PERFSQR_MOD_BITS;			\
+  } while (0)
+
+#define PERFSQR_MOD_1(r, d, inv, mask)				\
+  do {								\
+    unsigned   idx;						\
+    ASSERT ((d) <= GMP_LIMB_BITS);				\
+    PERFSQR_MOD_IDX(idx, r, d, inv);				\
+    TRACE (printf ("  PERFSQR_MOD_1 d=%u r=%lu idx=%u\n",	\
+		   d, r%d, idx));				\
+    if ((((mask) >> idx) & 1) == 0)				\
+      {								\
+	TRACE (printf ("  non-square\n"));			\
+	return 0;						\
+      }								\
+  } while (0)
+
+/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo)			\
+  do {								\
+    mp_limb_t  m;						\
+    unsigned   idx;						\
+    ASSERT ((d) <= 2*GMP_LIMB_BITS);				\
+								\
+    PERFSQR_MOD_IDX (idx, r, d, inv);				\
+    TRACE (printf ("  PERFSQR_MOD_2 d=%u r=%lu idx=%u\n",	\
+		   d, r%d, idx));				\
+    m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));	\
+    idx %= GMP_LIMB_BITS;					\
+    if (((m >> idx) & 1) == 0)					\
+      {								\
+	TRACE (printf ("  non-square\n"));			\
+	return 0;						\
+      }								\
+  } while (0)
+
+
+int
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+{
+  ASSERT (usize >= 1);
+
+  TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
+
+  /* The first test excludes 212/256 (82.8%) of the perfect square candidates
+     in O(1) time.  */
+  {
+    unsigned  idx = up[0] % 0x100;
+    if (((sq_res_0x100[idx / GMP_LIMB_BITS]
+	  >> (idx % GMP_LIMB_BITS)) & 1) == 0)
+      return 0;
+  }
+
+#if 0
+  /* Check that we have even multiplicity of 2, and then check that the rest is
+     a possible perfect square.  Leave disabled until we can determine this
+     really is an improvement.  If it is, it could completely replace the
+     simple probe above, since this should throw out more non-squares, but at
+     the expense of somewhat more cycles.  */
+  {
+    mp_limb_t lo;
+    int cnt;
+    lo = up[0];
+    while (lo == 0)
+      up++, lo = up[0], usize--;
+    count_trailing_zeros (cnt, lo);
+    if ((cnt & 1) != 0)
+      return 0;			/* return of not even multiplicity of 2 */
+    lo >>= cnt;			/* shift down to align lowest non-zero bit */
+    if ((lo & 6) != 0)
+      return 0;
+  }
+#endif
+
+
+  /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
+     according to their residues modulo small primes (or powers of
+     primes).  See perfsqr.h.  */
+  PERFSQR_MOD_TEST (up, usize);
+
+
+  /* For the third and last test, we finally compute the square root,
+     to make sure we've really got a perfect square.  */
+  {
+    mp_ptr root_ptr;
+    int res;
+    TMP_DECL;
+
+    TMP_MARK;
+    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+
+    /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
+    res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+    TMP_FREE;
+
+    return res;
+  }
+}

diff --git a/mpn/generic/popham.c b/mpn/generic/popham.c
new file mode 100644
index 0000000..87974d7
--- /dev/null
+++ b/mpn/generic/popham.c

@@ -0,0 +1,125 @@
+/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
+
+Copyright 1994, 1996, 2000-2002, 2005, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if OPERATION_popcount
+#define FNAME mpn_popcount
+#define POPHAM(u,v) u
+#endif
+
+#if OPERATION_hamdist
+#define FNAME mpn_hamdist
+#define POPHAM(u,v) u ^ v
+#endif
+
+mp_bitcnt_t
+FNAME (mp_srcptr up,
+#if OPERATION_hamdist
+       mp_srcptr vp,
+#endif
+       mp_size_t n) __GMP_NOTHROW
+{
+  mp_bitcnt_t result = 0;
+  mp_limb_t p0, p1, p2, p3, x, p01, p23;
+  mp_size_t i;
+
+  ASSERT (n >= 1);		/* Actually, this code handles any n, but some
+				   assembly implementations do not.  */
+
+  for (i = n >> 2; i != 0; i--)
+    {
+      p0 = POPHAM (up[0], vp[0]);
+      p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p1 = POPHAM (up[1], vp[1]);
+      p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p01 = p0 + p1;							/* 8 0-8 */
+      p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
+
+      p2 = POPHAM (up[2], vp[2]);
+      p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p3 = POPHAM (up[3], vp[3]);
+      p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+      p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+
+      p23 = p2 + p3;							/* 8 0-8 */
+      p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
+
+      x = p01 + p23;							/* 8 0-32 */
+      x = (x >> 8) + x;							/* 8 0-64 */
+      x = (x >> 16) + x;						/* 8 0-128 */
+#if GMP_LIMB_BITS > 32
+      x = ((x >> 32) & 0xff) + (x & 0xff);				/* 8 0-256 */
+      result += x;
+#else
+      result += x & 0xff;
+#endif
+      up += 4;
+#if OPERATION_hamdist
+      vp += 4;
+#endif
+    }
+
+  n &= 3;
+  if (n != 0)
+    {
+      x = 0;
+      do
+	{
+	  p0 = POPHAM (up[0], vp[0]);
+	  p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
+	  p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
+	  p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;			/* 8 0-8 */
+
+	  x += p0;
+	  up += 1;
+#if OPERATION_hamdist
+	  vp += 1;
+#endif
+	}
+      while (--n);
+
+      x = (x >> 8) + x;
+      x = (x >> 16) + x;
+#if GMP_LIMB_BITS > 32
+      x = (x >> 32) + x;
+#endif
+      result += x & 0xff;
+    }
+
+  return result;
+}

diff --git a/mpn/generic/pow_1.c b/mpn/generic/pow_1.c
new file mode 100644
index 0000000..de11cd2
--- /dev/null
+++ b/mpn/generic/pow_1.c

@@ -0,0 +1,135 @@
+/* mpn_pow_1 -- Compute powers R = U^exp.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_size_t
+mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
+{
+  mp_limb_t x;
+  int cnt, i;
+  mp_size_t rn;
+  int par;
+
+  ASSERT (bn >= 1);
+  /* FIXME: Add operand overlap criteria */
+
+  if (exp <= 1)
+    {
+      if (exp == 0)
+	{
+	  rp[0] = 1;
+	  return 1;
+	}
+      else
+	{
+	  MPN_COPY (rp, bp, bn);
+	  return bn;
+	}
+    }
+
+  /* Count number of bits in exp, and compute where to put initial square in
+     order to magically get results in the entry rp.  Use simple code,
+     optimized for small exp.  For large exp, the bignum operations will take
+     so much time that the slowness of this code will be negligible.  */
+  par = 0;
+  cnt = GMP_LIMB_BITS;
+  x = exp;
+  do
+    {
+      par ^= x;
+      cnt--;
+      x >>= 1;
+    } while (x != 0);
+  exp <<= cnt;
+
+  if (bn == 1)
+    {
+      mp_limb_t rl, rh, bl = bp[0];
+
+      if ((cnt & 1) != 0)
+	MP_PTR_SWAP (rp, tp);
+
+      umul_ppmm (rh, rl, bl, bl << GMP_NAIL_BITS);
+      rp[0] = rl >> GMP_NAIL_BITS;
+      rp[1] = rh;
+      rn = 1 + (rh != 0);
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+	{
+	  exp <<= 1;
+	  if ((exp & GMP_LIMB_HIGHBIT) != 0)
+	    {
+	      rp[rn] = rh = mpn_mul_1 (rp, rp, rn, bl);
+	      rn += rh != 0;
+	    }
+
+	  if (--i == 0)
+	    break;
+
+	  mpn_sqr (tp, rp, rn);
+	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
+	  MP_PTR_SWAP (rp, tp);
+	}
+    }
+  else
+    {
+      if (((par ^ cnt) & 1) == 0)
+	MP_PTR_SWAP (rp, tp);
+
+      mpn_sqr (rp, bp, bn);
+      rn = 2 * bn; rn -= rp[rn - 1] == 0;
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+	{
+	  exp <<= 1;
+	  if ((exp & GMP_LIMB_HIGHBIT) != 0)
+	    {
+	      rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);
+	      MP_PTR_SWAP (rp, tp);
+	    }
+
+	  if (--i == 0)
+	    break;
+
+	  mpn_sqr (tp, rp, rn);
+	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
+	  MP_PTR_SWAP (rp, tp);
+	}
+    }
+
+  return rn;
+}

diff --git a/mpn/generic/powlo.c b/mpn/generic/powlo.c
new file mode 100644
index 0000000..c109512
--- /dev/null
+++ b/mpn/generic/powlo.c

@@ -0,0 +1,188 @@
+/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+
+Copyright 2007-2009, 2012, 2015, 2016, 2018, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits)
+{
+  unsigned nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi <= nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t) 1 << nbits) - 1);
+    }
+}
+
+static inline unsigned
+win_size (mp_bitcnt_t eb)
+{
+  unsigned k;
+  static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  ASSERT (eb > 1);
+  for (k = 0; eb > x[k++];)
+    ;
+  return k;
+}
+
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+   Requires that ep[en-1] is non-zero.
+   Uses scratch space tp[3n-1..0], i.e., 3n words.  */
+/* We only use n words in the scratch space, we should pass tp + n to
+   mullo/sqrlo as a temporary area, it is needed. */
+void
+mpn_powlo (mp_ptr rp, mp_srcptr bp,
+	   mp_srcptr ep, mp_size_t en,
+	   mp_size_t n, mp_ptr tp)
+{
+  unsigned cnt;
+  mp_bitcnt_t ebi;
+  unsigned windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_limb_t *pp;
+  long i;
+  int flipflop;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+
+  TMP_MARK;
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+  windowsize = win_size (ebi);
+  if (windowsize > 1)
+    {
+      mp_limb_t *this_pp, *last_pp;
+      ASSERT (windowsize < ebi);
+
+      pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
+
+      this_pp = pp;
+
+      MPN_COPY (this_pp, bp, n);
+
+      /* Store b^2 in tp.  */
+      mpn_sqrlo (tp, bp, n);
+
+      /* Precompute odd powers of b and put them in the temporary area at pp.  */
+      i = (1 << (windowsize - 1)) - 1;
+      do
+	{
+	  last_pp = this_pp;
+	  this_pp += n;
+	  mpn_mullo_n (this_pp, last_pp, tp, n);
+	} while (--i != 0);
+
+      expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
+
+      /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */
+      count_trailing_zeros (cnt, expbits);
+      ebi += cnt;
+      expbits >>= cnt;
+
+      MPN_COPY (rp, pp + n * (expbits >> 1), n);
+    }
+  else
+    {
+      pp = tp + n;
+      MPN_COPY (pp, bp, n);
+      MPN_COPY (rp, bp, n);
+      --ebi;
+    }
+
+  flipflop = 0;
+
+  do
+    {
+      while (getbit (ep, ebi) == 0)
+	{
+	  mpn_sqrlo (tp, rp, n);
+	  MP_PTR_SWAP (rp, tp);
+	  flipflop = ! flipflop;
+	  if (--ebi == 0)
+	    goto done;
+	}
+
+      /* The next bit of the exponent is 1.  Now extract the largest block of
+	 bits <= windowsize, and such that the least significant bit is 1.  */
+
+      expbits = getbits (ep, ebi, windowsize);
+      this_windowsize = MIN (windowsize, ebi);
+
+      count_trailing_zeros (cnt, expbits);
+      this_windowsize -= cnt;
+      ebi -= this_windowsize;
+      expbits >>= cnt;
+
+      while (this_windowsize > 1)
+	{
+	  mpn_sqrlo (tp, rp, n);
+	  mpn_sqrlo (rp, tp, n);
+	  this_windowsize -= 2;
+	}
+
+      if (this_windowsize != 0)
+	mpn_sqrlo (tp, rp, n);
+      else
+	{
+	  MP_PTR_SWAP (rp, tp);
+	  flipflop = ! flipflop;
+	}
+
+      mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n);
+    } while (ebi != 0);
+
+ done:
+  if (flipflop)
+    MPN_COPY (tp, rp, n);
+  TMP_FREE;
+}

diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c
new file mode 100644
index 0000000..1e30f2f
--- /dev/null
+++ b/mpn/generic/powm.c

@@ -0,0 +1,1003 @@
+/* mpn_powm -- Compute R = U^E mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. W <- U
+
+  2. T <- (B^n * U) mod M                Convert to REDC form
+
+  3. Compute table U^1, U^3, U^5... of E-dependent size
+
+  4. While there are more bits in E
+       W <- power left-to-right base-k
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
+     pp area is allocated locally anyway?
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Handle small bases with initial, reduction-free exponentiation.
+
+   * Call new division functions, not mpn_tdiv_qr.
+
+   * Consider special code for one-limb M.
+
+   * How should we handle the redc1/redc2/redc_n choice?
+     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
+     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
+     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+     This disregards the addmul_N constant term, but we could think of
+     that as part of the respective mullo.
+
+   * When U (the base) is small, we should start the exponentiation with plain
+     operations, then convert that partial result to REDC form.
+
+   * When U is just one limb, should it be handled without the k-ary tricks?
+     We could keep a factor of B^n in W, but use U' = BU as base.  After
+     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
+     mod M.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_0
+#define MPN_REDC_0(r0, u1, u0, m0, invm)				\
+  do {									\
+    mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy;				\
+    _u0 = (u0);								\
+    _m0 = (m0);								\
+    umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK);	\
+    ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0);			\
+    _u1 = (u1);								\
+    _r0 = _u1 - _p1;							\
+    _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */			\
+    (r0) = _r0 & GMP_NUMB_MASK;						\
+  } while (0)
+
+#undef MPN_REDC_1
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1(rp, up, mp, n, invm)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
+    if (cy != 0)							\
+      mpn_sub_n (rp, up + n, mp, n);					\
+    else								\
+      MPN_COPY (rp, up + n, n);						\
+  } while (0)
+#else
+#define MPN_REDC_1(rp, up, mp, n, invm)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+#endif
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi <= nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t) 1 << nbits) - 1);
+    }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+  int k;
+  static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  for (k = 0; eb > x[k++]; )
+    ;
+  return k;
+}
+
+/* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+  mp_ptr tp, qp;
+  TMP_DECL;
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+  TMP_FREE;
+}
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
+#undef mpn_rsblsh1_n_ip2
+#if HAVE_NATIVE_mpn_rsblsh1_n
+#define mpn_rsblsh1_n_ip2(a,b,n)	mpn_rsblsh1_n(a,b,a,n)
+#else
+#define mpn_rsblsh1_n_ip2(a,b,n)				\
+  do								\
+    {								\
+      mpn_lshift (a, a, n, 1);					\
+      mpn_sub_n (a, a, b, n);					\
+    } while (0)
+#endif
+#endif
+
+#define INNERLOOP2						\
+  do								\
+    {								\
+      MPN_SQR (tp, rp, n);					\
+      MPN_REDUCE (rp, tp, mp, n, mip);				\
+      if (mpn_cmp (rp, mp, n) >= 0)				\
+	ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n));		\
+      if (getbit (ep, ebi) != 0)				\
+	{							\
+	  if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0)	\
+	    ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));		\
+	  else							\
+	    mpn_rsblsh1_n_ip2 (rp, mp, n);			\
+	}							\
+    } while (--ebi != 0)
+
+/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd and > 1.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+static void
+mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
+	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  mp_bitcnt_t ebi, mbi, tbi;
+  mp_size_t tn;
+  int count;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n > 0 && (mp[0] & 1) != 0);
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
+
+  if (LIKELY (mbi <= GMP_NUMB_MAX))
+    {
+      count_leading_zeros(count, (mp_limb_t) mbi);
+      count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
+    }
+  else
+    {
+      mp_bitcnt_t tc = mbi;
+
+      count = 0;
+      do { ++count; } while ((tc >>= 1) != 0);
+    }
+
+  tbi = getbits (ep, ebi, count);
+  if (tbi >= mbi)
+    {
+      --count;
+      ASSERT ((tbi >> count) == 1);
+      tbi >>= 1;
+      ASSERT (tbi < mbi);
+      ASSERT (ebi > count);
+    }
+  else if (ebi <= count)
+    {
+      MPN_FILL (rp, n, 0);
+      rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+      return;
+    }
+  ebi -= count;
+
+  if (n == 1)
+    {
+      mp_limb_t r0, m0, invm;
+      m0 = *mp;
+
+      /* redcify (rp, tp, tn + 1, mp, n); */
+      /* TODO: test direct use of udiv_qrnnd */
+      ASSERT (tbi < GMP_LIMB_BITS);
+      tp[1] = CNST_LIMB (1) << tbi;
+      tp[0] = CNST_LIMB (0);
+      r0 = mpn_mod_1 (tp, 2, m0);
+
+      binvert_limb (invm, m0);
+      do
+	{
+	  mp_limb_t t0, t1, t2;
+	  /* MPN_SQR (tp, rp, n);			*/
+	  umul_ppmm (t1, t0, r0, r0);
+	  /* MPN_REDUCE (rp, tp, mp, n, mip);		*/
+	  MPN_REDC_0(r0, t1, t0, m0, invm);
+
+	  t2 = r0 << 1;
+	  t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
+	  r0 = getbit (ep, ebi) != 0 ? t2 : r0;
+	} while (--ebi != 0);
+
+      /* tp[1] = 0; tp[0] = r0;	*/
+      /* MPN_REDUCE (rp, tp, mp, n, mip);	*/
+      MPN_REDC_0(*rp, 0, r0, m0, invm);
+
+      return;
+    }
+
+  TMP_MARK;
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (ip[0], mp[0]);
+      ip[0] = -ip[0];
+    }
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      mpn_binvert (ip, mp, 2, tp);
+      ip[0] = -ip[0]; ip[1] = ~ip[1];
+    }
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (ip[0], mp[0]);
+      ip[0] = -ip[0];
+    }
+#endif
+  else
+    {
+      mip = TMP_ALLOC_LIMBS (n);
+      mpn_binvert (mip, mp, n, tp);
+    }
+
+  tn = tbi / GMP_LIMB_BITS;
+  MPN_ZERO (tp, tn);
+  tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+
+  redcify (rp, tp, tn + 1, mp, n);
+
+#if WANT_REDC_2
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP2;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+
+#else  /* WANT_REDC_2 */
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+#endif  /* WANT_REDC_2 */
+
+  MPN_COPY (tp, rp, n);
+  MPN_FILL (tp + n, n, 0);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, ip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+void
+mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	  mp_srcptr ep, mp_size_t en,
+	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int cnt;
+  mp_bitcnt_t ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp;
+  long i;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  if (bn == 1 && bp[0] == 2)
+    {
+      mpn_2powm (rp, ep, en, mp, n, tp);
+      return;
+    }
+
+  TMP_MARK;
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+
+#if 0
+  if (bn < n)
+    {
+      /* Do the first few exponent bits without mod reductions,
+	 until the result is greater than the mod argument.  */
+      for (;;)
+	{
+	  mpn_sqr (tp, this_pp, tn);
+	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
+	  if (getbit (ep, ebi) != 0)
+	    mpn_mul (..., tp, tn, bp, bn);
+	  ebi--;
+	}
+    }
+#endif
+
+  windowsize = win_size (ebi);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+#endif
+  else
+    {
+      mip = TMP_ALLOC_LIMBS (n);
+      mpn_binvert (mip, mp, n, tp);
+    }
+
+  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
+
+  this_pp = pp;
+  redcify (this_pp, bp, bn, mp, n);
+
+  /* Store b^2 at rp.  */
+  mpn_sqr (tp, this_pp, n);
+#if 0
+  if (n == 1) {
+    MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
+  } else
+#endif
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  /* Precompute odd powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+#if 1
+    if (n == 1) {
+      umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
+      ++this_pp ;
+      MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
+    } else
+#endif
+    {
+      mpn_mul_n (tp, this_pp, rp, n);
+      this_pp += n;
+#if WANT_REDC_2
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	MPN_REDC_2 (this_pp, tp, mp, n, mip);
+#else
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
+#endif
+      else
+	mpn_redc_n (this_pp, tp, mp, n, mip);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  ebi -= windowsize;
+
+  /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
+  count_trailing_zeros (cnt, expbits);
+  ebi += cnt;
+  expbits >>= cnt;
+
+  MPN_COPY (rp, pp + n * (expbits >> 1), n);
+
+#define INNERLOOP							\
+  while (ebi != 0)							\
+    {									\
+      while (getbit (ep, ebi) == 0)					\
+	{								\
+	  MPN_SQR (tp, rp, n);						\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	  if (--ebi == 0)						\
+	    goto done;							\
+	}								\
+									\
+      /* The next bit of the exponent is 1.  Now extract the largest	\
+	 block of bits <= windowsize, and such that the least		\
+	 significant bit is 1.  */					\
+									\
+      expbits = getbits (ep, ebi, windowsize);				\
+      this_windowsize = MIN (ebi, windowsize);				\
+									\
+      count_trailing_zeros (cnt, expbits);				\
+      this_windowsize -= cnt;						\
+      ebi -= this_windowsize;						\
+      expbits >>= cnt;							\
+									\
+      do								\
+	{								\
+	  MPN_SQR (tp, rp, n);						\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	}								\
+      while (--this_windowsize != 0);					\
+									\
+      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+    }
+
+
+  if (n == 1)
+    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		umul_ppmm((r)[1], *(r), *(a), *(b))
+#define MPN_SQR(r,a,n)			umul_ppmm((r)[1], *(r), *(a), *(a))
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
+      INNERLOOP;
+    }
+  else
+#if WANT_REDC_2
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+
+#else  /* WANT_REDC_2 */
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP;
+	    }
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	  else
+	    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+    }
+#endif  /* WANT_REDC_2 */
+
+ done:
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/pre_divrem_1.c b/mpn/generic/pre_divrem_1.c
new file mode 100644
index 0000000..3b29d77
--- /dev/null
+++ b/mpn/generic/pre_divrem_1.c

@@ -0,0 +1,145 @@
+/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Don't bloat a shared library with unused code. */
+#if USE_PREINV_DIVREM_1
+
+/* Same test here for skipping one divide step as in mpn_divrem_1.
+
+   The main reason for a separate shift==0 case is that not all CPUs give
+   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
+   code used on shift==0.  shift==0 is also reasonably common in mp_bases
+   big_base, for instance base==10 on a 64-bit limb.
+
+   Under shift!=0 it would be possible to call mpn_lshift to adjust the
+   dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.
+
+   If size==0 then the result is simply xsize limbs of zeros, but nothing
+   special is done for that, since it wouldn't be a usual call, and
+   certainly never arises from mpn_get_str which is our main caller.  */
+
+mp_limb_t
+mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
+		     mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,
+		     mp_limb_t dinv, int shift)
+{
+  mp_limb_t  ahigh, qhigh, r;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  d;
+
+  ASSERT (xsize >= 0);
+  ASSERT (size >= 1);
+  ASSERT (d_unnorm != 0);
+#if WANT_ASSERT
+  {
+    int        want_shift;
+    mp_limb_t  want_dinv;
+    count_leading_zeros (want_shift, d_unnorm);
+    ASSERT (shift == want_shift);
+    invert_limb (want_dinv, d_unnorm << shift);
+    ASSERT (dinv == want_dinv);
+  }
+#endif
+  /* FIXME: What's the correct overlap rule when xsize!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));
+
+  ahigh = ap[size-1];
+  d = d_unnorm << shift;
+  qp += (size + xsize - 1);   /* dest high limb */
+
+  if (shift == 0)
+    {
+      /* High quotient limb is 0 or 1, and skip a divide step. */
+      r = ahigh;
+      qhigh = (r >= d);
+      r = (qhigh ? r-d : r);
+      *qp-- = qhigh;
+      size--;
+
+      for (i = size-1; i >= 0; i--)
+	{
+	  n0 = ap[i];
+	  udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+	  qp--;
+	}
+    }
+  else
+    {
+      r = 0;
+      if (ahigh < d_unnorm)
+	{
+	  r = ahigh << shift;
+	  *qp-- = 0;
+	  size--;
+	  if (size == 0)
+	    goto done_integer;
+	}
+
+      n1 = ap[size-1];
+      r |= n1 >> (GMP_LIMB_BITS - shift);
+
+      for (i = size-2; i >= 0; i--)
+	{
+	  ASSERT (r < d);
+	  n0 = ap[i];
+	  udiv_qrnnd_preinv (*qp, r, r,
+			     ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+			     d, dinv);
+	  qp--;
+	  n1 = n0;
+	}
+      udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);
+      qp--;
+    }
+
+ done_integer:
+  for (i = 0; i < xsize; i++)
+    {
+      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+      qp--;
+    }
+
+  return r >> shift;
+}
+
+#endif /* USE_PREINV_DIVREM_1 */

diff --git a/mpn/generic/pre_mod_1.c b/mpn/generic/pre_mod_1.c
new file mode 100644
index 0000000..78ae308
--- /dev/null
+++ b/mpn/generic/pre_mod_1.c

@@ -0,0 +1,61 @@
+/* mpn_preinv_mod_1 (up, un, d, dinv) -- Divide (UP,,UN) by the normalized D.
+   DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
+   Return the single-limb remainder.
+
+Copyright 1991, 1993, 1994, 2000-2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function used to be documented, but is now considered obsolete.  It
+   continues to exist for binary compatibility, even when not required
+   internally.  */
+
+mp_limb_t
+mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t n0, r;
+
+  ASSERT (un >= 1);
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  r = up[un - 1];
+  if (r >= d)
+    r -= d;
+
+  for (i = un - 2; i >= 0; i--)
+    {
+      n0 = up[i];
+      udiv_rnnd_preinv (r, r, n0, d, dinv);
+    }
+  return r;
+}

diff --git a/mpn/generic/random.c b/mpn/generic/random.c
new file mode 100644
index 0000000..485f9eb
--- /dev/null
+++ b/mpn/generic/random.c

@@ -0,0 +1,50 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_random (mp_ptr ptr, mp_size_t size)
+{
+  gmp_randstate_ptr  rands;
+
+  /* FIXME: Is size==0 supposed to be allowed? */
+  ASSERT (size >= 0);
+
+  if (size == 0)
+    return;
+
+  rands = RANDS;
+  _gmp_rand (ptr, rands, size * GMP_NUMB_BITS);
+
+  /* Make sure the most significant limb is non-zero.  */
+  while (ptr[size-1] == 0)
+    _gmp_rand (&ptr[size-1], rands, GMP_NUMB_BITS);
+}

diff --git a/mpn/generic/random2.c b/mpn/generic/random2.c
new file mode 100644
index 0000000..1eede67
--- /dev/null
+++ b/mpn/generic/random2.c

@@ -0,0 +1,105 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+   of ones and zeroes.  Suitable for border testing.
+
+Copyright 1992-1994, 1996, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+   Thus, we get the same random number sequence in the common cases.
+   FIXME: We should always generate the same random number sequence!  */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+void
+mpn_random2 (mp_ptr rp, mp_size_t n)
+{
+  gmp_randstate_ptr rstate = RANDS;
+  int bit_pos;			/* bit number of least significant bit where
+				   next bit field to be inserted */
+  mp_limb_t ran, ranm;		/* buffer for random bits */
+
+  /* FIXME: Is n==0 supposed to be allowed? */
+  ASSERT (n >= 0);
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  ran = ranm;
+
+  /* Start off at a random bit position in the most significant limb.  */
+  bit_pos = ran % GMP_NUMB_BITS;
+
+  gmp_rrandomb (rp, rstate, n * GMP_NUMB_BITS - bit_pos);
+}
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_bitcnt_t bi;
+  mp_limb_t ranm;		/* buffer for random bits */
+  unsigned cap_chunksize, chunksize;
+  mp_size_t i;
+
+  /* Set entire result to 111..1  */
+  i = BITS_TO_LIMBS (nbits) - 1;
+  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+  for (i = i - 1; i >= 0; i--)
+    rp[i] = GMP_NUMB_MAX;
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  cap_chunksize = nbits / (ranm % 4 + 1);
+  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+  bi = nbits;
+
+  for (;;)
+    {
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      if (bi == 0)
+	break;			/* low chunk is ...1 */
+
+      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+      if (bi == 0)
+	break;			/* low chunk is ...0 */
+    }
+}

diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c
new file mode 100644
index 0000000..eab128f
--- /dev/null
+++ b/mpn/generic/redc_1.c

@@ -0,0 +1,56 @@
+/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      ASSERT (up[0] == 0);
+      up[0] = cy;
+      up++;
+    }
+
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
+}

diff --git a/mpn/generic/redc_2.c b/mpn/generic/redc_2.c
new file mode 100644
index 0000000..8d15589
--- /dev/null
+++ b/mpn/generic/redc_2.c

@@ -0,0 +1,110 @@
+/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000-2002, 2004, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS != 0
+you lose
+#endif
+
+/* For testing purposes, define our own mpn_addmul_2 if there is none already
+   available.  */
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#undef mpn_addmul_2
+static mp_limb_t
+mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
+{
+  rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
+  return mpn_addmul_1 (rp + 1, up, n, vp[1]);
+}
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+  && defined (__ia64) && W_TYPE_SIZE == 64
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {									\
+    mp_limb_t _ph, _pl;							\
+    __asm__ ("xma.hu %0 = %3, %5, f0\n\t"				\
+	     "xma.l %1 = %3, %5, f0\n\t"				\
+	     ";;\n\t"							\
+	     "xma.l %0 = %3, %4, %0\n\t"				\
+	     ";;\n\t"							\
+	     "xma.l %0 = %2, %5, %0"					\
+	     : "=&f" (ph), "=&f" (pl)					\
+	     : "f" (uh), "f" (ul), "f" (vh), "f" (vl));			\
+  } while (0)
+#endif
+
+#ifndef umul2low
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {									\
+    mp_limb_t _ph, _pl;							\
+    umul_ppmm (_ph, _pl, ul, vl);					\
+    (ph) = _ph + (ul) * (vh) + (uh) * (vl);				\
+    (pl) = _pl;								\
+  } while (0)
+#endif
+
+mp_limb_t
+mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
+{
+  mp_limb_t q[2];
+  mp_size_t j;
+  mp_limb_t upn;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  if ((n & 1) != 0)
+    {
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
+      up++;
+    }
+
+  for (j = n - 2; j >= 0; j -= 2)
+    {
+      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
+      upn = up[n];		/* mpn_addmul_2 overwrites this */
+      up[1] = mpn_addmul_2 (up, mp, n, q);
+      up[0] = up[n];
+      up[n] = upn;
+      up += 2;
+    }
+
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
+}

diff --git a/mpn/generic/redc_n.c b/mpn/generic/redc_n.c
new file mode 100644
index 0000000..0c94b7c
--- /dev/null
+++ b/mpn/generic/redc_n.c

@@ -0,0 +1,80 @@
+/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  TODO
+
+  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
+    future mpn_mulhi) for the range we will be called.  Follow up that
+    assumption.
+
+  * Decrease scratch usage.
+
+  * Consider removing the residue canonicalisation.
+*/
+
+void
+mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
+{
+  mp_ptr xp, yp, scratch;
+  mp_limb_t cy;
+  mp_size_t rn;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (n > 8);
+
+  rn = mpn_mulmod_bnm1_next_size (n);
+
+  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
+
+  xp = scratch;
+  mpn_mullo_n (xp, up, ip, n);
+
+  yp = scratch + n;
+  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
+
+  ASSERT_ALWAYS (2 * n > rn);				/* could handle this */
+
+  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);		/* undo wrap around */
+  MPN_DECR_U (yp + 2*n - rn, rn, cy);
+
+  cy = mpn_sub_n (rp, up + n, yp + n, n);
+  if (cy != 0)
+    mpn_add_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/remove.c b/mpn/generic/remove.c
new file mode 100644
index 0000000..cbb0742
--- /dev/null
+++ b/mpn/generic/remove.c

@@ -0,0 +1,182 @@
+/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
+   number.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2012-2014, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if GMP_LIMB_BITS > 50
+#define LOG 50
+#else
+#define LOG GMP_LIMB_BITS
+#endif
+
+
+/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
+   Ouput  W = {wp,*wn} allocation need is exactly *wn
+
+   Set W = U / V^k, where k is the largest integer <= cap such that the
+   division yields an integer.
+
+   FIXME: We currently allow any operand overlap.  This is quite non mpn-ish
+   and might be changed, since it cost significant temporary space.
+   * If we require W to have space for un + 1 limbs, we could save qp or qp2
+     (but we will still need to copy things into wp 50% of the time).
+   * If we allow ourselves to clobber U, we could save the other of qp and qp2,
+     and the initial COPY (but also here we would need un + 1 limbs).
+*/
+
+/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface.  This need
+   indicates a flaw in the current itch mechanism: Which operands not greater
+   than un,un will incur the worst itch?  We need a parallel foo_maxitch set
+   of functions.  */
+static void
+mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
+		  mp_srcptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn)
+{
+  mp_ptr scratch_out;
+  TMP_DECL;
+
+  TMP_MARK;
+  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
+  mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
+
+  TMP_FREE;
+}
+
+mp_bitcnt_t
+mpn_remove (mp_ptr wp, mp_size_t *wn,
+	    mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn,
+	    mp_bitcnt_t cap)
+{
+  mp_srcptr pwpsp[LOG];
+  mp_size_t pwpsn[LOG];
+  mp_size_t npowers;
+  mp_ptr tp, qp, np, qp2;
+  mp_srcptr pp;
+  mp_size_t pn, nn, qn, i;
+  mp_bitcnt_t pwr;
+  TMP_DECL;
+
+  ASSERT (un > 0);
+  ASSERT (vn > 0);
+  ASSERT (vp[0] % 2 != 0);	/* 2-adic division wants odd numbers */
+  ASSERT (vn > 1 || vp[0] > 1);	/* else we would loop indefinitely */
+
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_3 (qp, un + 1,	/* quotient, alternating */
+		     qp2, un + 1,	/* quotient, alternating */
+		     tp, (un + 1 + vn) / 2); /* remainder */
+  pp = vp;
+  pn = vn;
+
+  MPN_COPY (qp, up, un);
+  qn = un;
+
+  npowers = 0;
+  while (qn >= pn)
+    {
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
+      if (!mpn_zero_p (tp, pn))
+	{
+	  if (mpn_cmp (tp, pp, pn) != 0)
+	    break;		/* could not divide by V^npowers */
+	}
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      mpn_neg (qp, qp, qn+1);
+
+      qn += qp[qn] != 0;
+
+      pwpsp[npowers] = pp;
+      pwpsn[npowers] = pn;
+      ++npowers;
+
+      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
+	break;
+
+      nn = 2 * pn - 1;		/* next power will be at least this large */
+      if (nn > qn)
+	break;			/* next power would be overlarge */
+
+      if (npowers == 1)		/* Alloc once, but only if it's needed */
+	np = TMP_ALLOC_LIMBS (qn + LOG);	/* powers of V */
+      else
+	np += pn;
+
+      mpn_sqr (np, pp, pn);
+      pn = nn + (np[nn] != 0);
+      pp = np;
+    }
+
+  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
+
+  for (i = npowers; --i >= 0;)
+    {
+      pn = pwpsn[i];
+      if (qn < pn)
+	continue;
+
+      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
+	continue;		/* V^i would bring us past cap */
+
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
+      if (!mpn_zero_p (tp, pn))
+	{
+	  if (mpn_cmp (tp, pwpsp[i], pn) != 0)
+	    continue;		/* could not divide by V^i */
+	}
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      mpn_neg (qp, qp, qn+1);
+
+      qn += qp[qn] != 0;
+
+      pwr += (mp_bitcnt_t) 1 << i;
+    }
+
+  MPN_COPY (wp, qp, qn);
+  *wn = qn;
+
+  TMP_FREE;
+
+  return pwr;
+}

diff --git a/mpn/generic/rootrem.c b/mpn/generic/rootrem.c
new file mode 100644
index 0000000..a79099e
--- /dev/null
+++ b/mpn/generic/rootrem.c

@@ -0,0 +1,515 @@
+/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
+   store the truncated integer part at rootp and the remainder at remp.
+
+   Contributed by Paul Zimmermann (algorithm) and
+   Paul Zimmermann and Torbjorn Granlund (implementation).
+   Marco Bodrato wrote logbased_root to seed the loop.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* FIXME:
+     This implementation is not optimal when remp == NULL, since the complexity
+     is M(n), whereas it should be M(n/k) on average.
+*/
+
+#include <stdio.h>		/* for NULL */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
+				       mp_limb_t, int);
+
+#define MPN_RSHIFT(rp,up,un,cnt) \
+  do {									\
+    if ((cnt) != 0)							\
+      mpn_rshift (rp, up, un, cnt);					\
+    else								\
+      {									\
+	MPN_COPY_INCR (rp, up, un);					\
+      }									\
+  } while (0)
+
+#define MPN_LSHIFT(cy,rp,up,un,cnt) \
+  do {									\
+    if ((cnt) != 0)							\
+      cy = mpn_lshift (rp, up, un, cnt);				\
+    else								\
+      {									\
+	MPN_COPY_DECR (rp, up, un);					\
+	cy = 0;								\
+      }									\
+  } while (0)
+
+
+/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
+   If remp <> NULL, put in {remp, un} the remainder.
+   Return the size (in limbs) of the remainder if remp <> NULL,
+	  or a non-zero value iff the remainder is non-zero when remp = NULL.
+   Assumes:
+   (a) up[un-1] is not zero
+   (b) rootp has at least space for ceil(un/k) limbs
+   (c) remp has at least space for un limbs (in case remp <> NULL)
+   (d) the operands do not overlap.
+
+   The auxiliary memory usage is 3*un+2 if remp = NULL,
+   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
+*/
+mp_size_t
+mpn_rootrem (mp_ptr rootp, mp_ptr remp,
+	     mp_srcptr up, mp_size_t un, mp_limb_t k)
+{
+  ASSERT (un > 0);
+  ASSERT (up[un - 1] != 0);
+  ASSERT (k > 1);
+
+  if (UNLIKELY (k == 2))
+    return mpn_sqrtrem (rootp, remp, up, un);
+  /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
+  if (remp == NULL && (un + 2) / 3 > k)
+    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
+       with one more limb, allowing us to compute the exact integral result. */
+    {
+      mp_ptr sp, wp;
+      mp_size_t rn, sn, wn;
+      TMP_DECL;
+      TMP_MARK;
+      wn = un + k;
+      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+      TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
+			 sp, sn); /* approximate root of padded input */
+      MPN_COPY (wp + k, up, un);
+      MPN_FILL (wp, k, 0);
+      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
+      /* The approximate root S = {sp,sn} is either the correct root of
+	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
+	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
+	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
+	 whether it is exact or not.) */
+      MPN_COPY (rootp, sp + 1, sn - 1);
+      TMP_FREE;
+      return rn;
+    }
+  else
+    {
+      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
+    }
+}
+
+#define LOGROOT_USED_BITS 8
+#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
+#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
+/* Puts in *rootp some bits of the k^nt root of the number
+   2^bitn * 1.op ; where op represents the "fractional" bits.
+
+   The returned value is the number of bits of the root minus one;
+   i.e. an approximation of the root will be
+   (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
+
+   Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
+   one is not counted).
+ */
+static unsigned
+logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
+{
+  /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
+  static const
+  unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
+			 23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
+			 44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
+			 64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
+			 83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
+			101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+			118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
+			135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+			150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
+			165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
+			180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
+			194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
+			207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
+			220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
+			232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
+			245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
+
+  /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
+  static const
+  unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
+			 12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
+			 23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
+			 36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
+			 49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
+			 62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
+			 76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
+			 91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
+			107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
+			123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
+			139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
+			157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
+			175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
+			194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
+			214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
+			235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
+  mp_bitcnt_t retval;
+
+  if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
+    {
+      /* In the unlikely case, we use two divisions and a modulo. */
+      retval = bitn / k;
+      bitn %= k;
+      bitn = (bitn << LOGROOT_USED_BITS |
+	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+    }
+  else
+    {
+      bitn = (bitn << LOGROOT_USED_BITS |
+	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
+      retval = bitn >> LOGROOT_USED_BITS;
+      bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
+    }
+  ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
+  *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
+    | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
+  return retval;
+}
+
+/* if approx is non-zero, does not compute the final remainder */
+static mp_size_t
+mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
+		      mp_limb_t k, int approx)
+{
+  mp_ptr qp, rp, sp, wp, scratch;
+  mp_size_t qn, rn, sn, wn, nl, bn;
+  mp_limb_t save, save2, cy, uh;
+  mp_bitcnt_t unb; /* number of significant bits of {up,un} */
+  mp_bitcnt_t xnb; /* number of significant bits of the result */
+  mp_bitcnt_t b, kk;
+  mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
+  int ni;
+  int perf_pow;
+  unsigned ulz, snb, c, logk;
+  TMP_DECL;
+
+  /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
+  uh = up[un - 1];
+  count_leading_zeros (ulz, uh);
+  ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
+  unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
+  /* unb is the (truncated) logarithm of the input U in base 2*/
+
+  if (unb < k) /* root is 1 */
+    {
+      rootp[0] = 1;
+      if (remp == NULL)
+	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
+      else
+	{
+	  mpn_sub_1 (remp, up, un, CNST_LIMB (1));
+	  un -= (remp [un - 1] == 0);	/* There should be at most one zero limb,
+				   if we demand u to be normalized  */
+	}
+      return un;
+    }
+  /* if (unb - k < k/2 + k/16) // root is 2 */
+
+  if (ulz == GMP_NUMB_BITS)
+    uh = up[un - 2];
+  else
+    uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
+  ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
+
+  xnb = logbased_root (rootp, uh, unb, k);
+  snb = LOGROOT_RETURNED_BITS - 1;
+  /* xnb+1 is the number of bits of the root R */
+  /* snb+1 is the number of bits of the current approximation S */
+
+  kk = k * xnb;		/* number of truncated bits in the input */
+
+  /* FIXME: Should we skip the next two loops when xnb <= snb ? */
+  for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
+    ;
+  /* logk = ceil(log(k)/log(2)) + 1 */
+
+  /* xnb is the number of remaining bits to determine in the kth root */
+  for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
+    {
+      /* invariant: here we want xnb+1 total bits for the kth root */
+
+      /* if c is the new value of xnb, this means that we'll go from a
+	 root of c+1 bits (say s') to a root of xnb+1 bits.
+	 It is proved in the book "Modern Computer Arithmetic" by Brent
+	 and Zimmermann, Chapter 1, that
+	 if s' >= k*beta, then at most one correction is necessary.
+	 Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
+	 c >= ceil((xnb + log2(k))/2). */
+      if (xnb > logk)
+	xnb = (xnb + logk) / 2;
+      else
+	--xnb;	/* add just one bit at a time */
+    }
+
+  *rootp >>= snb - xnb;
+  kk -= xnb;
+
+  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
+  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
+     sizes[i] <= 2 * sizes[i+1].
+     Newton iteration will first compute sizes[ni-1] extra bits,
+     then sizes[ni-2], ..., then sizes[0] = b. */
+
+  TMP_MARK;
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+  /* THINK: with the use of logbased_root, maybe the constant is
+     258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
+		     qp, un + EXTRA,  /* will contain quotient and remainder
+					 of R/(k*S^(k-1)), and S^k */
+		     wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+					 and temporary for mpn_pow_1 */
+
+  if (remp == NULL)
+    rp = scratch;	/* will contain the remainder */
+  else
+    rp = remp;
+  sp = rootp;
+
+  sn = 1;		/* Initial approximation has one limb */
+
+  for (b = xnb; ni != 0; --ni)
+    {
+      /* 1: loop invariant:
+	 {sp, sn} is the current approximation of the root, which has
+		  exactly 1 + sizes[ni] bits.
+	 {rp, rn} is the current remainder
+	 {wp, wn} = {sp, sn}^(k-1)
+	 kk = number of truncated bits of the input
+      */
+
+      /* Since each iteration treats b bits from the root and thus k*b bits
+	 from the input, and we already considered b bits from the input,
+	 we now have to take another (k-1)*b bits from the input. */
+      kk -= (k - 1) * b; /* remaining input bits */
+      /* {rp, rn} = floor({up, un} / 2^kk) */
+      rn = un - kk / GMP_NUMB_BITS;
+      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
+      rn -= rp[rn - 1] == 0;
+
+      /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+      for (c = 0;; c++)
+	{
+	  /* Compute S^k in {qp,qn}. */
+	  /* W <- S^(k-1) for the next iteration,
+	     and S^k = W * S. */
+	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+	  mpn_mul (qp, wp, wn, sp, sn);
+	  qn = wn + sn;
+	  qn -= qp[qn - 1] == 0;
+
+	  perf_pow = 1;
+	  /* if S^k > floor(U/2^kk), the root approximation was too large */
+	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
+	    MPN_DECR_U (sp, sn, 1);
+	  else
+	    break;
+	}
+
+      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+      /* sometimes two corrections are needed with logbased_root*/
+      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+      ASSERT_ALWAYS (rn >= qn);
+
+      b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
+				      next iteration */
+      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
+
+      kk = kk - b;
+      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
+      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
+      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
+		 - floor(kk / GMP_NUMB_BITS)
+	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
+		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
+	     = 2 + (b - 2) / GMP_NUMB_BITS
+	 thus since nl is an integer:
+	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
+
+      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* R = R - Q = floor(U/2^kk) - S^k */
+      if (perf_pow != 0)
+	{
+	  mpn_sub (rp, rp, rn, qp, qn);
+	  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+	  /* first multiply the remainder by 2^b */
+	  MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
+	  rn = rn + bn;
+	  if (cy != 0)
+	    {
+	      rp[rn] = cy;
+	      rn++;
+	    }
+
+	  save = rp[bn];
+	  /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
+	  if (nl - 1 > bn)
+	    save2 = rp[bn + 1];
+	}
+      else
+	{
+	  rn = bn;
+	  save2 = save = 0;
+	}
+      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* Now insert bits [kk,kk+b-1] from the input U */
+      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
+      /* set to zero high bits of rp[bn] */
+      rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
+      /* restore corresponding bits */
+      rp[bn] |= save;
+      if (nl - 1 > bn)
+	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
+			       they start by bit 0 in rp[0], so they use
+			       at most ceil(b/GMP_NUMB_BITS) limbs */
+      /* FIXME: Should we normalise {rp,rn} here ?*/
+
+      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
+      cy = mpn_mul_1 (wp, wp, wn, k);
+      wp[wn] = cy;
+      wn += cy != 0;
+
+      /* 6: current buffers: {sp,sn}, {qp,qn} */
+
+      /* multiply the root approximation by 2^b */
+      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
+      sn = sn + b / GMP_NUMB_BITS;
+      if (cy != 0)
+	{
+	  sp[sn] = cy;
+	  sn++;
+	}
+
+      save = sp[b / GMP_NUMB_BITS];
+
+      /* Number of limbs used by b bits, when least significant bit is
+	 aligned to least limb */
+      bn = (b - 1) / GMP_NUMB_BITS + 1;
+
+      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+      if (UNLIKELY (rn < wn))
+	{
+	  MPN_FILL (sp, bn, 0);
+	}
+      else
+	{
+	  qn = rn - wn; /* expected quotient size */
+	  if (qn <= bn) { /* Divide only if result is not too big. */
+	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
+	    qn += qp[qn] != 0;
+	  }
+
+      /* 5: current buffers: {sp,sn}, {qp,qn}.
+	 Note: {rp,rn} is not needed any more since we'll compute it from
+	 scratch at the end of the loop.
+       */
+
+      /* the quotient should be smaller than 2^b, since the previous
+	 approximation was correctly rounded toward zero */
+	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
+	    {
+	      for (qn = 1; qn < bn; ++qn)
+		sp[qn - 1] = GMP_NUMB_MAX;
+	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
+	    }
+	  else
+	    {
+      /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+      /* Combine sB and q to form sB + q.  */
+	      MPN_COPY (sp, qp, qn);
+	      MPN_ZERO (sp + qn, bn - qn);
+	    }
+	}
+      sp[b / GMP_NUMB_BITS] |= save;
+
+      /* 8: current buffer: {sp,sn} */
+
+    }
+
+  /* otherwise we have rn > 0, thus the return value is ok */
+  if (!approx || sp[0] <= CNST_LIMB (1))
+    {
+      for (c = 0;; c++)
+	{
+	  /* Compute S^k in {qp,qn}. */
+	  /* Last iteration: we don't need W anymore. */
+	  /* mpn_pow_1 requires that both qp and wp have enough
+	     space to store the result {sp,sn}^k + 1 limb */
+	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
+
+	  perf_pow = 1;
+	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
+	    MPN_DECR_U (sp, sn, 1);
+	  else
+	    break;
+	};
+
+      /* sometimes two corrections are needed with logbased_root*/
+      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
+
+      rn = perf_pow != 0;
+      if (rn != 0 && remp != NULL)
+	{
+	  mpn_sub (remp, up, un, qp, qn);
+	  rn = un;
+	  MPN_NORMALIZE_NOT_ZERO (remp, rn);
+	}
+    }
+
+  TMP_FREE;
+  return rn;
+}

diff --git a/mpn/generic/rshift.c b/mpn/generic/rshift.c
new file mode 100644
index 0000000..15d427d
--- /dev/null
+++ b/mpn/generic/rshift.c

@@ -0,0 +1,69 @@
+/* mpn_rshift -- Shift right low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and N limbs long) cnt bits to the right
+   and store the n least significant limbs of the result at rp.
+   The bits shifted out to the right are returned.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be <= up.
+*/
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  tnc = GMP_NUMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc) & GMP_NUMB_MASK;
+  low_limb = high_limb >> cnt;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | ((high_limb << tnc) & GMP_NUMB_MASK);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}

diff --git a/mpn/generic/sbpi1_bdiv_q.c b/mpn/generic/sbpi1_bdiv_q.c
new file mode 100644
index 0000000..850e593
--- /dev/null
+++ b/mpn/generic/sbpi1_bdiv_q.c

@@ -0,0 +1,96 @@
+/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+   returning quotient only.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes Q = - U / D mod B^un, destroys U.
+
+   D must be odd. dinv is (-D)^-1 mod B.
+
+*/
+
+void
+mpn_sbpi1_bdiv_q (mp_ptr qp,
+		  mp_ptr up, mp_size_t un,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t q;
+
+  ASSERT (dn > 0);
+  ASSERT (un >= dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+  ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+  if (un > dn)
+    {
+      mp_limb_t cy, hi;
+      for (i = un - dn - 1, cy = 0; i > 0; i--)
+	{
+	  q = dinv * up[0];
+	  hi = mpn_addmul_1 (up, dp, dn, q);
+
+	  ASSERT (up[0] == 0);
+	  *qp++ = q;
+	  hi += cy;
+	  cy = hi < cy;
+	  hi += up[dn];
+	  cy += hi < up[dn];
+	  up[dn] = hi;
+	  up++;
+	}
+      q = dinv * up[0];
+      hi = cy + mpn_addmul_1 (up, dp, dn, q);
+      ASSERT (up[0] == 0);
+      *qp++ = q;
+      up[dn] += hi;
+      up++;
+    }
+  for (i = dn; i > 1; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mpn_addmul_1 (up, dp, i, q);
+      ASSERT (up[0] == 0);
+      *qp++ = q;
+      up++;
+    }
+
+  /* Final limb */
+  *qp = dinv * up[0];
+}

diff --git a/mpn/generic/sbpi1_bdiv_qr.c b/mpn/generic/sbpi1_bdiv_qr.c
new file mode 100644
index 0000000..6146c45
--- /dev/null
+++ b/mpn/generic/sbpi1_bdiv_qr.c

@@ -0,0 +1,82 @@
+/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+   returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+   Output:
+
+      Q = -U * D^{-1} mod B^qn,
+
+      R = (U + Q * D) * B^(-qn)
+
+   Stores the dn least significant limbs of R at {up + un - dn, dn},
+   and returns the carry from the addition N + Q*D.
+
+   D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_qr (mp_ptr qp,
+		   mp_ptr up, mp_size_t un,
+		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  ASSERT (dn > 0);
+  ASSERT (un > dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+  ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
+
+  for (i = un - dn, cy = 0; i != 0; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+      *qp++ = q;
+
+      hi += cy;
+      cy = hi < cy;
+      hi += up[dn];
+      cy += hi < up[dn];
+      up[dn] = hi;
+      up++;
+    }
+
+  return cy;
+}

diff --git a/mpn/generic/sbpi1_bdiv_r.c b/mpn/generic/sbpi1_bdiv_r.c
new file mode 100644
index 0000000..a609951
--- /dev/null
+++ b/mpn/generic/sbpi1_bdiv_r.c

@@ -0,0 +1,79 @@
+/* mpn_sbpi1_bdiv_r -- schoolbook Hensel division with precomputed inverse,
+   returning remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = un - dn.
+   Output:
+
+      Q = -U * D^{-1} mod B^qn,
+
+      R = (U + Q * D) * B^(-qn)
+
+   Stores the dn least significant limbs of R at {up + un - dn, dn},
+   and returns the carry from the addition N + Q*D.
+
+   D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_r (mp_ptr up, mp_size_t un,
+		  mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  ASSERT (dn > 0);
+  ASSERT (un > dn);
+  ASSERT ((dp[0] & 1) != 0);
+  ASSERT (-(dp[0] * dinv) == 1);
+
+  for (i = un - dn, cy = 0; i != 0; i--)
+    {
+      mp_limb_t q = dinv * up[0];
+      mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
+
+      hi += cy;
+      cy = hi < cy;
+      hi += up[dn];
+      cy += hi < up[dn];
+      up[dn] = hi;
+      up++;
+    }
+
+  return cy;
+}

diff --git a/mpn/generic/sbpi1_div_q.c b/mpn/generic/sbpi1_div_q.c
new file mode 100644
index 0000000..a9975eb
--- /dev/null
+++ b/mpn/generic/sbpi1_div_q.c

@@ -0,0 +1,302 @@
+/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_q (mp_ptr qp,
+		 mp_ptr np, mp_size_t nn,
+		 mp_srcptr dp, mp_size_t dn,
+		 mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  mp_size_t dn_orig = dn;
+  mp_srcptr dp_orig = dp;
+  mp_ptr np_orig = np;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 -= cy1;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+	{
+	  np--;
+	  if (UNLIKELY (n1 >= (d1 & flag)))
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+	      if (UNLIKELY (n1 != cy))
+		{
+		  if (n1 < (cy & flag))
+		    {
+		      q--;
+		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
+		    }
+		  else
+		    flag = 0;
+		}
+	      n1 = np[1];
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	      cy1 = n0 < cy;
+	      n0 = (n0 - cy) & GMP_NUMB_MASK;
+	      cy = n1 < cy1;
+	      n1 -= cy1;
+	      np[0] = n0;
+
+	      if (UNLIKELY (cy != 0))
+		{
+		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+		  q--;
+		}
+	    }
+
+	  *--qp = q;
+
+	  /* Truncate operands.  */
+	  dn--;
+	  dp++;
+	}
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+	{
+	  q = GMP_NUMB_MASK;
+	  cy = mpn_submul_1 (np, dp, 2, q);
+
+	  if (UNLIKELY (n1 != cy))
+	    {
+	      if (n1 < (cy & flag))
+		{
+		  q--;
+		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+		}
+	      else
+		flag = 0;
+	    }
+	  n1 = np[1];
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  np[0] = n0;
+	  np[1] = n1;
+	}
+
+      *--qp = q;
+    }
+  ASSERT_ALWAYS (np[1] == n1);
+  np += 2;
+
+
+  dn = dn_orig;
+  if (UNLIKELY (n1 < (dn & flag)))
+    {
+      mp_limb_t q, x;
+
+      /* The quotient may be too large if the remainder is small.  Recompute
+	 for above ignored operand parts, until the remainder spills.
+
+	 FIXME: The quality of this code isn't the same as the code above.
+	 1. We don't compute things in an optimal order, high-to-low, in order
+	    to terminate as quickly as possible.
+	 2. We mess with pointers and sizes, adding and subtracting and
+	    adjusting to get things right.  It surely could be streamlined.
+	 3. The only termination criteria are that we determine that the
+	    quotient needs to be adjusted, or that we have recomputed
+	    everything.  We should stop when the remainder is so large
+	    that no additional subtracting could make it spill.
+	 4. If nothing else, we should not do two loops of submul_1 over the
+	    data, instead handle both the triangularization and chopping at
+	    once.  */
+
+      x = n1;
+
+      if (dn > 2)
+	{
+	  /* Compensate for triangularization.  */
+	  mp_limb_t y;
+
+	  dp = dp_orig;
+	  if (qn + 1 < dn)
+	    {
+	      dp += dn - (qn + 1);
+	      dn = qn + 1;
+	    }
+
+	  y = np[-2];
+
+	  for (i = dn - 3; i >= 0; i--)
+	    {
+	      q = qp[i];
+	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+	      if (y < cy)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	      y -= cy;
+	    }
+	  np[-2] = y;
+	}
+
+      dn = dn_orig;
+      if (qn + 1 < dn)
+	{
+	  /* Compensate for ignored dividend and divisor tails.  */
+
+	  dp = dp_orig;
+	  np = np_orig;
+
+	  if (qh != 0)
+	    {
+	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      if (qn != 0)
+			cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+
+	  if (qn == 0)
+	    return qh;
+
+	  for (i = dn - qn - 2; i >= 0; i--)
+	    {
+	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh;
+		    }
+		  x--;
+		}
+	    }
+	}
+    }
+
+  return qh;
+}

diff --git a/mpn/generic/sbpi1_div_qr.c b/mpn/generic/sbpi1_div_qr.c
new file mode 100644
index 0000000..7330a77
--- /dev/null
+++ b/mpn/generic/sbpi1_div_qr.c

@@ -0,0 +1,109 @@
+/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_qr (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += nn - dn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = nn - (dn + 2); i > 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+  np[1] = n1;
+
+  return qh;
+}

diff --git a/mpn/generic/sbpi1_divappr_q.c b/mpn/generic/sbpi1_divappr_q.c
new file mode 100644
index 0000000..ef7ca26
--- /dev/null
+++ b/mpn/generic/sbpi1_divappr_q.c

@@ -0,0 +1,198 @@
+/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm, returning approximate quotient.  The quotient returned
+   is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_divappr_q (mp_ptr qp,
+		     mp_ptr np, mp_size_t nn,
+		     mp_srcptr dp, mp_size_t dn,
+		     mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;			/* offset dn by 2 for main division loops,
+				   saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+	{
+	  q = GMP_NUMB_MASK;
+	  mpn_submul_1 (np - dn, dp, dn + 2, q);
+	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	  cy1 = n0 < cy;
+	  n0 = (n0 - cy) & GMP_NUMB_MASK;
+	  cy = n1 < cy1;
+	  n1 -= cy1;
+	  np[0] = n0;
+
+	  if (UNLIKELY (cy != 0))
+	    {
+	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+	      q--;
+	    }
+	}
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+	{
+	  np--;
+	  if (UNLIKELY (n1 >= (d1 & flag)))
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+	      if (UNLIKELY (n1 != cy))
+		{
+		  if (n1 < (cy & flag))
+		    {
+		      q--;
+		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
+		    }
+		  else
+		    flag = 0;
+		}
+	      n1 = np[1];
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+	      cy1 = n0 < cy;
+	      n0 = (n0 - cy) & GMP_NUMB_MASK;
+	      cy = n1 < cy1;
+	      n1 -= cy1;
+	      np[0] = n0;
+
+	      if (UNLIKELY (cy != 0))
+		{
+		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+		  q--;
+		}
+	    }
+
+	  *--qp = q;
+
+	  /* Truncate operands.  */
+	  dn--;
+	  dp++;
+	}
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+	{
+	  q = GMP_NUMB_MASK;
+	  cy = mpn_submul_1 (np, dp, 2, q);
+
+	  if (UNLIKELY (n1 != cy))
+	    {
+	      if (n1 < (cy & flag))
+		{
+		  q--;
+		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+		}
+	      else
+		flag = 0;
+	    }
+	  n1 = np[1];
+	}
+      else
+	{
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  np[1] = n1;
+	  np[0] = n0;
+	}
+
+      *--qp = q;
+    }
+
+  ASSERT_ALWAYS (np[1] == n1);
+
+  return qh;
+}

diff --git a/mpn/generic/scan0.c b/mpn/generic/scan0.c
new file mode 100644
index 0000000..d71832e
--- /dev/null
+++ b/mpn/generic/scan0.c

@@ -0,0 +1,59 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb with a clear bit.
+ */
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++ ^ GMP_NUMB_MASK;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++ ^ GMP_NUMB_MASK;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}

diff --git a/mpn/generic/scan1.c b/mpn/generic/scan1.c
new file mode 100644
index 0000000..09e8060
--- /dev/null
+++ b/mpn/generic/scan1.c

@@ -0,0 +1,59 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb != 0.
+ */
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}

diff --git a/mpn/generic/sec_aors_1.c b/mpn/generic/sec_aors_1.c
new file mode 100644
index 0000000..6480fa1
--- /dev/null
+++ b/mpn/generic/sec_aors_1.c

@@ -0,0 +1,59 @@
+/* mpn_sec_add_1, mpn_sec_sub_1
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if OPERATION_sec_add_1
+#define FNAME mpn_sec_add_1
+#define FNAME_itch mpn_sec_add_1_itch
+#define OP_N mpn_add_n
+#endif
+#if OPERATION_sec_sub_1
+#define FNAME mpn_sec_sub_1
+#define FNAME_itch mpn_sec_sub_1_itch
+#define OP_N mpn_sub_n
+#endif
+
+/* It's annoying to that we need scratch space */
+mp_size_t
+FNAME_itch (mp_size_t n)
+{
+  return n;
+}
+
+mp_limb_t
+FNAME (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_ptr scratch)
+{
+  scratch[0] = b;
+  MPN_ZERO (scratch + 1, n-1);
+  return OP_N (rp, ap, scratch, n);
+}

diff --git a/mpn/generic/sec_div.c b/mpn/generic/sec_div.c
new file mode 100644
index 0000000..1f08649
--- /dev/null
+++ b/mpn/generic/sec_div.c

@@ -0,0 +1,131 @@
+/* mpn_sec_div_qr, mpn_sec_div_r -- Compute Q = floor(U / V), U = U mod V.
+   Side-channel silent under the assumption that the used instructions are
+   side-channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2011-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if OPERATION_sec_div_qr
+#define FNAME mpn_sec_div_qr
+#define FNAME_itch mpn_sec_div_qr_itch
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_div_r
+#define FNAME mpn_sec_div_r
+#define FNAME_itch mpn_sec_div_r_itch
+#define Q(q)
+#define RETTYPE void
+#endif
+
+mp_size_t
+FNAME_itch (mp_size_t nn, mp_size_t dn)
+{
+#if OPERATION_sec_div_qr
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_qr's needs of (2nn' - dn + 1) for a
+   total of 3nn + 4 limbs at tp.  Note that mpn_sec_pi1_div_qr's nn is one
+   greater than ours, therefore +4 and not just +2.  */
+  return 3 * nn + 4;
+#endif
+#if OPERATION_sec_div_r
+/* Needs (nn + dn + 1) + mpn_sec_pi1_div_r's needs of (dn + 1) for a total of
+   nn + 2dn + 2 limbs at tp.  */
+  return nn + 2 * dn + 2;
+#endif
+}
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_ptr tp)
+{
+  mp_limb_t d1, d0;
+  unsigned int cnt;
+  mp_limb_t inv32;
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn - 1] != 0);
+
+  d1 = dp[dn - 1];
+  count_leading_zeros (cnt, d1);
+
+  if (cnt != 0)
+    {
+      mp_limb_t qh, cy;
+      mp_ptr np2, dp2;
+      dp2 = tp;					/* dn limbs */
+      mpn_lshift (dp2, dp, dn, cnt);
+
+      np2 = tp + dn;				/* (nn + 1) limbs */
+      cy = mpn_lshift (np2, np, nn, cnt);
+      np2[nn++] = cy;
+
+      d0 = dp2[dn - 1];
+      d0 += (~d0 != 0);
+      invert_limb (inv32, d0);
+
+      /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is
+	 since nn here will have been incremented.  */
+#if OPERATION_sec_div_qr
+      qh = mpn_sec_pi1_div_qr (np2 + dn, np2, nn, dp2, dn, inv32, tp + nn + dn);
+      ASSERT (qh == 0);		/* FIXME: this indicates inefficiency! */
+      MPN_COPY (qp, np2 + dn, nn - dn - 1);
+      qh = np2[nn - 1];
+#else
+      mpn_sec_pi1_div_r (np2, nn, dp2, dn, inv32, tp + nn + dn);
+#endif
+
+      mpn_rshift (np, np2, dn, cnt);
+
+#if OPERATION_sec_div_qr
+      return qh;
+#endif
+    }
+  else
+    {
+      /* FIXME: Consider copying np => np2 here, adding a 0-limb at the top.
+	 That would simplify the underlying pi1 function, since then it could
+	 assume nn > dn.  */
+      d0 = dp[dn - 1];
+      d0 += (~d0 != 0);
+      invert_limb (inv32, d0);
+
+#if OPERATION_sec_div_qr
+      return mpn_sec_pi1_div_qr (qp, np, nn, dp, dn, inv32, tp);
+#else
+      mpn_sec_pi1_div_r (np, nn, dp, dn, inv32, tp);
+#endif
+    }
+}

diff --git a/mpn/generic/sec_invert.c b/mpn/generic/sec_invert.c
new file mode 100644
index 0000000..07665d1
--- /dev/null
+++ b/mpn/generic/sec_invert.c

@@ -0,0 +1,177 @@
+/* mpn_sec_invert
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if 0
+/* Currently unused. Should be resurrected once mpn_cnd_neg is
+   advertised. */
+static mp_size_t
+mpn_cnd_neg_itch (mp_size_t n)
+{
+  return n;
+}
+#endif
+
+/* FIXME: Ought to return carry */
+static void
+mpn_cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n,
+	     mp_ptr scratch)
+{
+  mpn_lshift (scratch, ap, n, 1);
+  mpn_cnd_sub_n (cnd, rp, ap, scratch, n);
+}
+
+static int
+mpn_sec_eq_ui (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_limb_t d;
+  ASSERT (n > 0);
+
+  d = ap[0] ^ b;
+
+  while (--n > 0)
+    d |= ap[n];
+
+  return d == 0;
+}
+
+mp_size_t
+mpn_sec_invert_itch (mp_size_t n)
+{
+  return 4*n;
+}
+
+/* Compute V <-- A^{-1} (mod M), in data-independent time. M must be
+   odd. Returns 1 on success, and 0 on failure (i.e., if gcd (A, m) !=
+   1). Inputs and outputs of size n, and no overlap allowed. The {ap,
+   n} area is destroyed. For arbitrary inputs, bit_size should be
+   2*n*GMP_NUMB_BITS, but if A or M are known to be smaller, e.g., if
+   M = 2^521 - 1 and A < M, bit_size can be any bound on the sum of
+   the bit sizes of A and M. */
+int
+mpn_sec_invert (mp_ptr vp, mp_ptr ap, mp_srcptr mp,
+		mp_size_t n, mp_bitcnt_t bit_size,
+		mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (bit_size > 0);
+  ASSERT (mp[0] & 1);
+  ASSERT (! MPN_OVERLAP_P (ap, n, vp, n));
+#define bp (scratch + n)
+#define up (scratch + 2*n)
+#define m1hp (scratch + 3*n)
+
+  /* Maintain
+
+       a = u * orig_a (mod m)
+       b = v * orig_a (mod m)
+
+     and b odd at all times. Initially,
+
+       a = a_orig, u = 1
+       b = m,      v = 0
+     */
+
+
+  up[0] = 1;
+  mpn_zero (up+1, n - 1);
+  mpn_copyi (bp, mp, n);
+  mpn_zero (vp, n);
+
+  ASSERT_CARRY (mpn_rshift (m1hp, mp, n, 1));
+  ASSERT_NOCARRY (mpn_sec_add_1 (m1hp, m1hp, n, 1, scratch));
+
+  while (bit_size-- > 0)
+    {
+      mp_limb_t odd, swap, cy;
+
+      /* Always maintain b odd. The logic of the iteration is as
+	 follows. For a, b:
+
+	   odd = a & 1
+	   a -= odd * b
+	   if (underflow from a-b)
+	     {
+	       b += a, assigns old a
+	       a = B^n-a
+	     }
+
+	   a /= 2
+
+	 For u, v:
+
+	   if (underflow from a - b)
+	     swap u, v
+	   u -= odd * v
+	   if (underflow from u - v)
+	     u += m
+
+	   u /= 2
+	   if (a one bit was shifted out)
+	     u += (m+1)/2
+
+	 As long as a > 0, the quantity
+
+	   (bitsize of a) + (bitsize of b)
+
+	 is reduced by at least one bit per iteration, hence after (bit_size of
+	 orig_a) + (bit_size of m) - 1 iterations we surely have a = 0. Then b
+	 = gcd(orig_a, m) and if b = 1 then also v = orig_a^{-1} (mod m).
+      */
+
+      ASSERT (bp[0] & 1);
+      odd = ap[0] & 1;
+
+      swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
+      mpn_cnd_add_n (swap, bp, bp, ap, n);
+      mpn_cnd_neg (swap, ap, ap, n, scratch);
+
+      mpn_cnd_swap (swap, up, vp, n);
+      cy = mpn_cnd_sub_n (odd, up, up, vp, n);
+      cy -= mpn_cnd_add_n (cy, up, up, mp, n);
+      ASSERT (cy == 0);
+
+      cy = mpn_rshift (ap, ap, n, 1);
+      ASSERT (cy == 0);
+      cy = mpn_rshift (up, up, n, 1);
+      cy = mpn_cnd_add_n (cy, up, up, m1hp, n);
+      ASSERT (cy == 0);
+    }
+  /* Should be all zeros, but check only extreme limbs */
+  ASSERT ( (ap[0] | ap[n-1]) == 0);
+  /* Check if indeed gcd == 1. */
+  return mpn_sec_eq_ui (bp, n, 1);
+#undef bp
+#undef up
+#undef m1hp
+}

diff --git a/mpn/generic/sec_mul.c b/mpn/generic/sec_mul.c
new file mode 100644
index 0000000..4bbfa61
--- /dev/null
+++ b/mpn/generic/sec_mul.c

@@ -0,0 +1,48 @@
+/* mpn_sec_mul.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_sec_mul (mp_ptr rp,
+	     mp_srcptr ap, mp_size_t an,
+	     mp_srcptr bp, mp_size_t bn,
+	     mp_ptr tp)
+{
+  mpn_mul_basecase (rp, ap, an, bp, bn);
+}
+
+mp_size_t
+mpn_sec_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  return 0;
+}

diff --git a/mpn/generic/sec_pi1_div.c b/mpn/generic/sec_pi1_div.c
new file mode 100644
index 0000000..29d01e7
--- /dev/null
+++ b/mpn/generic/sec_pi1_div.c

@@ -0,0 +1,172 @@
+/* mpn_sec_pi1_div_qr, mpn_sec_pi1_div_r -- Compute Q = floor(U / V), U = U
+   mod V.  Side-channel silent under the assumption that the used instructions
+   are side-channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011-2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This side-channel silent division algorithm reduces the partial remainder by
+   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+   division algorithm.  We actually do not insist on reducing by exactly
+   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+   too large (B is the limb base, D is the divisor, and i is the induction
+   variable); the subsequent step will handle the extra partial remainder bits.
+
+   With that partial remainder reduction, each step generates a quotient "half
+   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
+   a lower (q0h) which are stored sparsely in separate limb arrays.  These
+   arrays are added at the end; using separate arrays avoids data-dependent
+   carry propagation which could else pose a side-channel leakage problem.
+
+   The quotient half limbs may be between -3 to 0 from the accurate value
+   ("accurate" being the one which corresponds to a reduction to a principal
+   partial remainder).  Too small quotient half limbs correspond to too large
+   remainders, which we reduce later, as described above.
+
+   In order to keep quotients from getting too big, corresponding to a negative
+   partial remainder, we use an inverse which is slightly smaller than usually.
+*/
+
+#if OPERATION_sec_pi1_div_qr
+/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+#define FNAME mpn_sec_pi1_div_qr
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sec_pi1_div_r
+/* Needs (dn + 1) limbs at tp.  */
+#define FNAME mpn_sec_pi1_div_r
+#define Q(q)
+#define RETTYPE void
+#endif
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_limb_t dinv,
+       mp_ptr tp)
+{
+  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+  mp_size_t i;
+  mp_ptr hp;
+#if OPERATION_sec_pi1_div_qr
+  mp_limb_t qh;
+  mp_ptr qlp, qhp;
+#endif
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
+
+  if (nn == dn)
+    {
+      cy = mpn_sub_n (np, np, dp, dn);
+      mpn_cnd_add_n (cy, np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+      return 1 - cy;
+#else
+      return;
+#endif
+    }
+
+  /* Create a divisor copy shifted half a limb.  */
+  hp = tp;					/* (dn + 1) limbs */
+  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+
+#if OPERATION_sec_pi1_div_qr
+  qlp = tp + (dn + 1);				/* (nn - dn) limbs */
+  qhp = tp + (nn + 1);				/* (nn - dn) limbs */
+#endif
+
+  np += nn - dn;
+  nh = 0;
+
+  for (i = nn - dn - 1; i >= 0; i--)
+    {
+      np--;
+
+      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+      umul_ppmm (q1h, dummy, nh, dinv);
+      q1h += nh;
+#if OPERATION_sec_pi1_div_qr
+      qhp[i] = q1h;
+#endif
+      mpn_submul_1 (np, hp, dn + 1, q1h);
+
+      nh = np[dn];
+      umul_ppmm (q0h, dummy, nh, dinv);
+      q0h += nh;
+#if OPERATION_sec_pi1_div_qr
+      qlp[i] = q0h;
+#endif
+      nh -= mpn_submul_1 (np, dp, dn, q0h);
+    }
+
+  /* 1st adjustment depends on extra high remainder limb.  */
+  cnd = nh != 0;				/* FIXME: cmp-to-int */
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += cnd;
+#endif
+  nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
+
+  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+     extra remainder limb was nullified by previous subtract.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+  cy = cy - nh;
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += 1 - cy;
+#endif
+  mpn_cnd_add_n (cy, np, np, dp, dn);
+
+  /* 3rd adjustment depends on remainder/divisor comparison.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sec_pi1_div_qr
+  qlp[0] += 1 - cy;
+#endif
+  mpn_cnd_add_n (cy, np, np, dp, dn);
+
+#if OPERATION_sec_pi1_div_qr
+  /* Combine quotient halves into final quotient.  */
+  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+
+  return qh;
+#else
+  return;
+#endif
+}

diff --git a/mpn/generic/sec_powm.c b/mpn/generic/sec_powm.c
new file mode 100644
index 0000000..bba11cf
--- /dev/null
+++ b/mpn/generic/sec_powm.c

@@ -0,0 +1,430 @@
+/* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
+   under the assumption that the multiply instruction is side channel silent.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2007-2009, 2011-2014, 2018-2019, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. T <- (B^n * U) mod M; convert to REDC form
+
+  2. Compute table U^0, U^1, U^2... of floor(log(E))-dependent size
+
+  3. While there are more bits in E
+       W <- power left-to-right base-k
+
+  The article "Defeating modexp side-channel attacks with data-independent
+  execution traces", https://gmplib.org/~tege/modexp-silent.pdf, has details.
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
+     slowdown.
+*/
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#undef MPN_REDC_1_SEC
+#if HAVE_NATIVE_mpn_sbpi1_bdiv_r
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
+    mpn_cnd_sub_n (cy, rp, up + n, mp, n);				\
+  } while (0)
+#else
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
+    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
+  } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
+    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
+  } while (0)
+#else
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) /* empty */
+#undef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD MP_SIZE_T_MAX
+#endif
+
+/* Define our own mpn squaring function.  We do this since we cannot use a
+   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
+   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
+   made inside mpn_sqr_basecase.  */
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
+   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+   file.  An assembly mpn_sqr_basecase that does not define it should allow
+   any size.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
+   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+   size.  */
+#define SQR_BASECASE_LIM  MP_SIZE_T_MAX
+#endif
+
+#define mpn_local_sqr(rp,up,n)						\
+  do {									\
+    if (ABOVE_THRESHOLD (n, SQR_BASECASE_THRESHOLD)			\
+	&& BELOW_THRESHOLD (n, SQR_BASECASE_LIM))			\
+      mpn_sqr_basecase (rp, up, n);					\
+    else								\
+      mpn_mul_basecase(rp, up, n, up, n);				\
+  } while (0)
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
+
+/* FIXME: Maybe some things would get simpler if all callers ensure
+   that bi >= nbits. As far as I understand, with the current code bi
+   < nbits can happen only for the final iteration. */
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+#ifndef POWM_SEC_TABLE
+#if GMP_NUMB_BITS < 50
+#define POWM_SEC_TABLE  2,33,96,780,2741
+#else
+#define POWM_SEC_TABLE  2,130,524,2578
+#endif
+#endif
+
+#if TUNE_PROGRAM_BUILD
+extern int win_size (mp_bitcnt_t);
+#else
+static inline int
+win_size (mp_bitcnt_t enb)
+{
+  int k;
+  /* Find k, such that x[k-1] < enb <= x[k].
+
+     We require that x[k] >= k, then it follows that enb > x[k-1] >=
+     k-1, which implies k <= enb.
+  */
+  static const mp_bitcnt_t x[] = {POWM_SEC_TABLE,~(mp_bitcnt_t)0};
+  for (k = 0; enb > x[k++]; )
+    ;
+  ASSERT (k <= enb);
+  return k;
+}
+#endif
+
+/* Convert U to REDC form, U_r = B^n * U mod M.
+   Uses scratch space at tp of size 2un + n + 1.  */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+
+  mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
+  MPN_COPY (rp, tp, n);
+}
+
+static mp_limb_t
+sec_binvert_limb (mp_limb_t n)
+{
+  mp_limb_t inv, t;
+  ASSERT ((n & 1) == 1);
+  /* 3 + 2 -> 5 */
+  inv = n + (((n + 1) << 1) & 0x18);
+
+  t = n * inv;
+#if GMP_NUMB_BITS <= 10
+  /* 5 x 2 -> 10 */
+  inv = 2 * inv - inv * t;
+#else /* GMP_NUMB_BITS > 10 */
+  /* 5 x 2 + 2 -> 12 */
+  inv = 2 * inv - inv * t + ((inv<<10)&-(t&(1<<5)));
+#endif /* GMP_NUMB_BITS <= 10 */
+
+  if (GMP_NUMB_BITS > 12)
+    {
+      t = n * inv - 1;
+      if (GMP_NUMB_BITS <= 36)
+	{
+	  /* 12 x 3 -> 36 */
+	  inv += inv * t * (t - 1);
+	}
+      else /* GMP_NUMB_BITS > 36 */
+	{
+	  mp_limb_t t2 = t * t;
+#if GMP_NUMB_BITS <= 60
+	  /* 12 x 5 -> 60 */
+	  inv += inv * (t2 + 1) * (t2 - t);
+#else /* GMP_NUMB_BITS > 60 */
+	  /* 12 x 5 + 4 -> 64 */
+	  inv *= (t2 + 1) * (t2 - t) + 1 - ((t<<48)&-(t&(1<<12)));
+
+	  /* 64 -> 128 -> 256 -> ... */
+	  for (int todo = (GMP_NUMB_BITS - 1) >> 6; todo != 0; todo >>= 1)
+	    inv = 2 * inv - inv * inv * n;
+#endif /* GMP_NUMB_BITS <= 60 */
+	}
+    }
+
+  ASSERT ((inv * n & GMP_NUMB_MASK) == 1);
+  return inv & GMP_NUMB_MASK;
+}
+
+/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
+   where en = ceil (enb / GMP_NUMB_BITS)
+   Requires that {mp, n} is odd (and hence also mp[0] odd).
+   Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
+void
+mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	      mp_srcptr ep, mp_bitcnt_t enb,
+	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp, ps;
+  long i;
+  int cnd;
+
+  ASSERT (enb > 0);
+  ASSERT (n > 0);
+  /* The code works for bn = 0, but the defined scratch space is 2 limbs
+     greater than we supply, when converting 1 to redc form .  */
+  ASSERT (bn > 0);
+  ASSERT ((mp[0] & 1) != 0);
+
+  windowsize = win_size (enb);
+
+  mip = ip;
+  mip[0] = sec_binvert_limb (mp[0]);
+  if (ABOVE_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mp_limb_t t, dummy, mip0 = mip[0];
+
+      umul_ppmm (t, dummy, mip0, mp[0]);
+      ASSERT (dummy == 1);
+      t += mip0 * mp[1]; /* t = (mp * mip0)[1] */
+
+      mip[1] = t * mip0 - 1; /* ~( - t * mip0) */
+    }
+  mip[0] = -mip[0];
+
+  pp = tp;
+  tp += (n << windowsize);	/* put tp after power table */
+
+  /* Compute pp[0] table entry */
+  /* scratch: |   n   | 1 |   n+2    |  */
+  /*          | pp[0] | 1 | redcify  |  */
+  this_pp = pp;
+  this_pp[n] = 1;
+  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
+  this_pp += n;
+
+  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
+     degenerate situation where B >> M, we let redcify use scratch space which
+     will later be used by the pp table (element 2 and up).  */
+  /* scratch: |   n   |   n   |  bn + n + 1  |  */
+  /*          | pp[0] | pp[1] |   redcify    |  */
+  redcify (this_pp, bp, bn, mp, n, this_pp + n);
+
+  /* Precompute powers of b and put them in the temporary area at pp.  */
+  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
+  ps = pp + n;		/* initially B^1 */
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+	{
+	  mpn_local_sqr (tp, ps, n);
+	  ps += n;
+	  this_pp += n;
+	  MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+
+	  mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+	  this_pp += n;
+	  MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+	}
+    }
+  else
+    {
+      for (i = (1 << windowsize) - 2; i > 0; i -= 2)
+	{
+	  mpn_local_sqr (tp, ps, n);
+	  ps += n;
+	  this_pp += n;
+	  MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+
+	  mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+	  this_pp += n;
+	  MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+	}
+    }
+
+  expbits = getbits (ep, enb, windowsize);
+  ASSERT_ALWAYS (enb >= windowsize);
+  enb -= windowsize;
+
+  mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
+
+  /* Main exponentiation loop.  */
+  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
+
+#define INNERLOOP							\
+  while (enb != 0)							\
+    {									\
+      expbits = getbits (ep, enb, windowsize);				\
+      this_windowsize = windowsize;					\
+      if (enb < windowsize)						\
+	{								\
+	  this_windowsize -= windowsize - enb;				\
+	  enb = 0;							\
+	}								\
+      else								\
+	enb -= windowsize;						\
+									\
+      do								\
+	{								\
+	  mpn_local_sqr (tp, rp, n);					\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	  this_windowsize--;						\
+	}								\
+      while (this_windowsize != 0);					\
+									\
+      mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
+      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
+									\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+    }
+
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+      INNERLOOP;
+    }
+  else
+    {
+#undef MPN_REDUCE
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+      INNERLOOP;
+    }
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+  else
+    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+
+  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
+  mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
+}
+
+mp_size_t
+mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
+{
+  int windowsize;
+  mp_size_t redcify_itch, itch;
+
+  /* FIXME: no more _local/_basecase difference. */
+  /* The top scratch usage will either be when reducing B in the 2nd redcify
+     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
+     is 3n or 4n depending on if we use mpn_local_sqr or a native
+     mpn_sqr_basecase.  We assume 4n always for now.) */
+
+  windowsize = win_size (enb);
+
+  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+     the (bn + n) term is due to redcify's own usage, and the rest is due to
+     mpn_sec_div_r's usage when called from redcify.  */
+  redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
+
+  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+     scratch needs of squaring/multiplication in the exponentiation loop.  */
+  itch = (n << windowsize) + (4 * n);
+
+  return MAX (itch, redcify_itch);
+}

diff --git a/mpn/generic/sec_sqr.c b/mpn/generic/sec_sqr.c
new file mode 100644
index 0000000..83fc7d9
--- /dev/null
+++ b/mpn/generic/sec_sqr.c

@@ -0,0 +1,76 @@
+/* mpn_sec_sqr.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
+   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+   file.  An assembly mpn_sqr_basecase that does not define it should allow
+   any size.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
+   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+void
+mpn_sec_sqr (mp_ptr rp,
+	     mp_srcptr ap, mp_size_t an,
+	     mp_ptr tp)
+{
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+   size.  */
+  mpn_sqr_basecase (rp, ap, an);
+#else
+/* Else use mpn_mul_basecase.  */
+  mpn_mul_basecase (rp, ap, an, ap, an);
+#endif
+}
+
+mp_size_t
+mpn_sec_sqr_itch (mp_size_t an)
+{
+  return 0;
+}

diff --git a/mpn/generic/sec_tabselect.c b/mpn/generic/sec_tabselect.c
new file mode 100644
index 0000000..f50bdac
--- /dev/null
+++ b/mpn/generic/sec_tabselect.c

@@ -0,0 +1,134 @@
+/* mpn_sec_tabselect.
+
+Copyright 2007-2009, 2011, 2013, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+#ifndef SEC_TABSELECT_METHOD
+#define SEC_TABSELECT_METHOD 1
+#endif
+
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   side-channel information leaks.  O(n*nents).  */
+
+#if SEC_TABSELECT_METHOD == 1
+void
+mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
+		   mp_size_t n, mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile const mp_limb_t *tp;
+
+  tp = tab;
+
+  /* Place first entry into result area. */
+  for (i = 0; i < n; i++)
+    rp[i] = tp[i];
+
+  /* Conditionally replace entry in result area by entry 1...(nents-1) using
+     masking trickery. */
+  for (k = 1; k < nents; k++)
+    {
+      /* Generate a mask using an expression which all compilers should compile
+	 into branch-free code.  The convoluted expression is designed to both
+	 allow mp_limb_t greater and mp_limb_t smaller than mp_size_t. */
+      mask = -(mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1));
+      tp += n;
+      for (i = 0; i < n; i++)
+	rp[i] = (rp[i] & mask) | (tp[i] & ~mask);
+    }
+}
+#endif
+
+#if SEC_TABSELECT_METHOD == 2
+void
+mpn_sec_tabselect (volatile mp_limb_t * restrict rp,
+		   volatile const mp_limb_t * restrict tab,
+		   mp_size_t n, mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask, r0, r1, r2, r3;
+  volatile const mp_limb_t * restrict tp;
+
+  if (n & 1)
+    {
+      tp = tab;
+      r0 = 0;
+      for (k = 0; k < nents; k++)
+	{
+	  mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+	  r0 += tp[0] & mask;
+	  tp += n;
+	}
+      rp[0] = r0;
+      rp += 1;
+      tab += 1;
+    }
+
+  if (n & 2)
+    {
+      tp = tab;
+      r0 = r1 = 0;
+      for (k = 0; k < nents; k++)
+	{
+	  mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+	  r0 += tp[0] & mask;
+	  r1 += tp[1] & mask;
+	  tp += n;
+	}
+      rp[0] = r0;
+      rp[1] = r1;
+      rp += 2;
+      tab += 2;
+    }
+
+  for (i = 0; i <= n - 4; i += 4)
+    {
+      tp = tab + i;
+      r0 = r1 = r2 = r3 = 0;
+      for (k = 0; k < nents; k++)
+	{
+	  mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+	  r0 += tp[0] & mask;
+	  r1 += tp[1] & mask;
+	  r2 += tp[2] & mask;
+	  r3 += tp[3] & mask;
+	  tp += n;
+	}
+      rp[0] = r0;
+      rp[1] = r1;
+      rp[2] = r2;
+      rp[3] = r3;
+      rp += 4;
+    }
+}
+#endif

diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c
new file mode 100644
index 0000000..2bd584c
--- /dev/null
+++ b/mpn/generic/set_str.c

@@ -0,0 +1,290 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) --
+   Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
+   vector pointed to by RES_PTR.  Return the number of limbs in RES_PTR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+Copyright 1991-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* TODO:
+
+      Perhaps do not compute the highest power?
+      Instead, multiply twice by the 2nd highest power:
+
+	       _______
+	      |_______|  hp
+	      |_______|  pow
+       _______________
+      |_______________|  final result
+
+
+	       _______
+	      |_______|  hp
+		  |___|  pow[-1]
+	   ___________
+	  |___________|  intermediate result
+		  |___|  pow[-1]
+       _______________
+      |_______________|  final result
+
+      Generalizing that idea, perhaps we should make powtab contain successive
+      cubes, not squares.
+*/
+
+#include "gmp-impl.h"
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Read the input string from least to most
+	 significant character/digit.  */
+
+      const unsigned char *s;
+      int next_bitpos;
+      mp_limb_t res_digit;
+      mp_size_t size;
+      int bits_per_indigit = mp_bases[base].big_base;
+
+      size = 0;
+      res_digit = 0;
+      next_bitpos = 0;
+
+      for (s = str + str_len - 1; s >= str; s--)
+	{
+	  int inp_digit = *s;
+
+	  res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
+	  next_bitpos += bits_per_indigit;
+	  if (next_bitpos >= GMP_NUMB_BITS)
+	    {
+	      rp[size++] = res_digit;
+	      next_bitpos -= GMP_NUMB_BITS;
+	      res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+	    }
+	}
+
+      if (res_digit != 0)
+	rp[size++] = res_digit;
+      return size;
+    }
+
+  if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_set_str (rp, str, str_len, base);
+  else
+    {
+      mp_ptr powtab_mem, tp;
+      powers_t powtab[GMP_LIMB_BITS];
+      int chars_per_limb;
+      mp_size_t size;
+      mp_size_t un;
+      TMP_DECL;
+
+      TMP_MARK;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+
+      un = str_len / chars_per_limb + 1; /* FIXME: scalar integer division */
+
+      /* Allocate one large block for the powers of big_base.  */
+      powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+
+      size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base);
+      powers_t *pt = powtab + n_pows;
+
+      tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+      size = mpn_dc_set_str (rp, str, str_len, pt, tp);
+
+      TMP_FREE;
+      return size;
+    }
+}
+
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+		const powers_t *powtab, mp_ptr tp)
+{
+  size_t len_lo, len_hi;
+  mp_limb_t cy;
+  mp_size_t ln, hn, n, sn;
+
+  len_lo = powtab->digits_in_base;
+
+  if (str_len <= len_lo)
+    {
+      if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+	return mpn_bc_set_str (rp, str, str_len, powtab->base);
+      else
+	return mpn_dc_set_str (rp, str, str_len, powtab - 1, tp);
+    }
+
+  len_hi = str_len - len_lo;
+  ASSERT (len_lo >= len_hi);
+
+  if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+    hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+  else
+    hn = mpn_dc_set_str (tp, str, len_hi, powtab - 1, rp);
+
+  sn = powtab->shift;
+
+  if (hn == 0)
+    {
+      /* Zero +1 limb here, to avoid reading an allocated but uninitialised
+	 limb in mpn_incr_u below.  */
+      MPN_ZERO (rp, powtab->n + sn + 1);
+    }
+  else
+    {
+      if (powtab->n > hn)
+	mpn_mul (rp + sn, powtab->p, powtab->n, tp, hn);
+      else
+	mpn_mul (rp + sn, tp, hn, powtab->p, powtab->n);
+      MPN_ZERO (rp, sn);
+    }
+
+  str = str + str_len - len_lo;
+  if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+    ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+  else
+    ln = mpn_dc_set_str (tp, str, len_lo, powtab - 1, tp + powtab->n + sn + 1);
+
+  if (ln != 0)
+    {
+      cy = mpn_add_n (rp, rp, tp, ln);
+      mpn_incr_u (rp + ln, cy);
+    }
+  n = hn + powtab->n + sn;
+  return n - (rp[n - 1] == 0);
+}
+
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  mp_size_t size;
+  size_t i;
+  long j;
+  mp_limb_t cy_limb;
+
+  mp_limb_t big_base;
+  int chars_per_limb;
+  mp_limb_t res_digit;
+
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+  ASSERT (str_len >= 1);
+
+  big_base = mp_bases[base].big_base;
+  chars_per_limb = mp_bases[base].chars_per_limb;
+
+  size = 0;
+  for (i = chars_per_limb; i < str_len; i += chars_per_limb)
+    {
+      res_digit = *str++;
+      if (base == 10)
+	{ /* This is a common case.
+	     Help the compiler to avoid multiplication.  */
+	  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+	    res_digit = res_digit * 10 + *str++;
+	}
+      else
+	{
+	  for (j = chars_per_limb - 1; j != 0; j--)
+	    res_digit = res_digit * base + *str++;
+	}
+
+      if (size == 0)
+	{
+	  if (res_digit != 0)
+	    {
+	      rp[0] = res_digit;
+	      size = 1;
+	    }
+	}
+      else
+	{
+#if HAVE_NATIVE_mpn_mul_1c
+	  cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+	  cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+	  cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+	  if (cy_limb != 0)
+	    rp[size++] = cy_limb;
+	}
+    }
+
+  big_base = base;
+  res_digit = *str++;
+  if (base == 10)
+    { /* This is a common case.
+	 Help the compiler to avoid multiplication.  */
+      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
+	{
+	  res_digit = res_digit * 10 + *str++;
+	  big_base *= 10;
+	}
+    }
+  else
+    {
+      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
+	{
+	  res_digit = res_digit * base + *str++;
+	  big_base *= base;
+	}
+    }
+
+  if (size == 0)
+    {
+      if (res_digit != 0)
+	{
+	  rp[0] = res_digit;
+	  size = 1;
+	}
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_mul_1c
+      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+      cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+      cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+      if (cy_limb != 0)
+	rp[size++] = cy_limb;
+    }
+  return size;
+}

diff --git a/mpn/generic/sizeinbase.c b/mpn/generic/sizeinbase.c
new file mode 100644
index 0000000..faee947
--- /dev/null
+++ b/mpn/generic/sizeinbase.c

@@ -0,0 +1,49 @@
+/* mpn_sizeinbase -- approximation to chars required for an mpn.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Same as mpz_sizeinbase, meaning exact for power-of-2 bases, and either
+   exact or 1 too big for other bases.  */
+
+size_t
+mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
+{
+  size_t  result;
+  MPN_SIZEINBASE (result, xp, xsize, base);
+  return result;
+}

diff --git a/mpn/generic/sqr.c b/mpn/generic/sqr.c
new file mode 100644
index 0000000..74fbff0
--- /dev/null
+++ b/mpn/generic/sqr.c

@@ -0,0 +1,98 @@
+/* mpn_sqr -- square natural numbers.
+
+Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+      mpn_mul_basecase (p, a, n, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
+    {
+      mpn_sqr_basecase (p, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+      mpn_toom2_sqr (p, a, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
+      mpn_toom3_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+      mpn_toom6_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+      mpn_toom8_sqr (p, a, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_fft_mul (p, a, n, a, n);
+    }
+}

diff --git a/mpn/generic/sqr_basecase.c b/mpn/generic/sqr_basecase.c
new file mode 100644
index 0000000..2645bad
--- /dev/null
+++ b/mpn/generic/sqr_basecase.c

@@ -0,0 +1,361 @@
+/* mpn_sqr_basecase -- Internal routine to square a natural number
+   of length n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t _i;							\
+    for (_i = 0; _i < (n); _i++)					\
+      {									\
+	mp_limb_t ul, lpl;						\
+	ul = (up)[_i];							\
+	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
+	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
+      }									\
+  } while (0)
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    mp_limb_t cy;							\
+    MPN_SQR_DIAGONAL (rp, up, n);					\
+    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
+    rp[2 * n - 1] += cy;						\
+  } while (0)
+#else
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    mp_limb_t cy;							\
+    MPN_SQR_DIAGONAL (rp, up, n);					\
+    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
+    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
+    rp[2 * n - 1] += cy;						\
+  } while (0)
+#endif
+#endif
+
+
+#undef READY_WITH_mpn_sqr_basecase
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+	{
+	  mp_limb_t ul, lpl;
+	  ul = up[0];
+	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lpl >> GMP_NAIL_BITS;
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+	{
+	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+    }
+  else
+    {
+      if (n == 2)
+	{
+#if HAVE_NATIVE_mpn_mul_2
+	  rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+	  rp[0] = 0;
+	  rp[1] = 0;
+	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+	{
+	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+    }
+
+  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
+
+/* mpn_sqr_basecase using plain mpn_addmul_2.
+
+   This is tricky, since we have to let mpn_addmul_2 make some undesirable
+   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
+   This forces us to conditionally add or subtract the mpn_sqr_diagonal
+   results.  Examples of the product we form:
+
+   n = 4              n = 5		n = 6
+   u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
+   u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
+					u4 * u5
+   add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
+   sub: u1	      sub: u1 u3	sub: u1 u3
+*/
+
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 1)
+	{
+	  mp_limb_t ul, lpl;
+	  ul = up[0];
+	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lpl >> GMP_NAIL_BITS;
+	  return;
+	}
+
+      /* The code below doesn't like unnormalized operands.  Since such
+	 operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+	{
+	  rp[2 * n - 2] = 0;
+	  rp[2 * n - 1] = 0;
+	  mpn_sqr_basecase (rp, up, n - 1);
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+	{
+	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+	{
+	  x0 = rp[i + 0];
+	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+	  x1 = rp[i + 1];
+	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+	  if (i + 4 >= 2 * n)
+	    break;
+	  mpn_incr_u (rp + i + 4, cy);
+	}
+    }
+  else
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 2)
+	{
+#if HAVE_NATIVE_mpn_mul_2
+	  rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
+	  rp[0] = 0;
+	  rp[1] = 0;
+	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
+	  return;
+	}
+
+      /* The code below doesn't like unnormalized operands.  Since such
+	 operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+	{
+	  rp[2 * n - 2] = 0;
+	  rp[2 * n - 1] = 0;
+	  mpn_sqr_basecase (rp, up, n - 1);
+	  return;
+	}
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+	{
+	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+	  tp[n + i] = cy;
+	}
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+	{
+	  x0 = rp[i + 0];
+	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+	  x1 = rp[i + 1];
+	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+	  if (i + 6 >= 2 * n)
+	    break;
+	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+	  mpn_incr_u (rp + i + 4, cy);
+	}
+      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
+    }
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
+
+/* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
+   allocation.  */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  if (n == 1)
+    {
+      mp_limb_t ul, lpl;
+      ul = up[0];
+      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+      rp[0] = lpl >> GMP_NAIL_BITS;
+    }
+  else
+    {
+      mp_size_t i;
+      mp_ptr xp;
+
+      rp += 1;
+      rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
+      for (i = n - 2; i != 0; i--)
+	{
+	  up += 1;
+	  rp += 2;
+	  rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
+	}
+
+      xp = rp - 2 * n + 3;
+      mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
+    }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase)
+
+/* Default mpn_sqr_basecase using mpn_addmul_1.  */
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
+
+  if (n == 1)
+    {
+      mp_limb_t ul, lpl;
+      ul = up[0];
+      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+      rp[0] = lpl >> GMP_NAIL_BITS;
+    }
+  else
+    {
+      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+      mp_ptr tp = tarr;
+      mp_limb_t cy;
+
+      /* must fit 2*n limbs in tarr */
+      ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+      tp[n - 1] = cy;
+      for (i = 2; i < n; i++)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+	  tp[n + i - 2] = cy;
+	}
+
+      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+    }
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif

diff --git a/mpn/generic/sqrlo.c b/mpn/generic/sqrlo.c
new file mode 100644
index 0000000..71530b6
--- /dev/null
+++ b/mpn/generic/sqrlo.c

@@ -0,0 +1,239 @@
+/* mpn_sqrlo -- squares an n-limb number and returns the low n limbs
+   of the result.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22   1
+#else
+#define MAYBE_range_basecase                                           \
+  ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM2_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22                                             \
+  ((SQRLO_DC_THRESHOLD == 0 ? SQRLO_BASECASE_THRESHOLD : SQRLO_DC_THRESHOLD) < SQR_TOOM3_THRESHOLD*36/(36-11) )
+#endif
+
+/*  THINK: The DC strategy uses different constants in different Toom's
+	 ranges. Something smoother?
+*/
+
+/*
+  Compute the least significant half of the product {xy,n}*{yp,n}, or
+  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+  Above the given threshold, the Divide and Conquer strategy is used.
+  The operand is split in two, and a full square plus a mullo
+  is used to obtain the final result. The more natural strategy is to
+  split in two halves, but this is far from optimal when a
+  sub-quadratic multiplication is used.
+
+  Mulders suggests an unbalanced split in favour of the full product,
+  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+  To compute the value of a, we assume that the cost of mullo for a
+  given size ML(n) is a fraction of the cost of a full product with
+  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+  then we can write:
+
+  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+  Given a value for e, want to minimise the value of k, i.e. the
+  function k=(1-a)^e/(1-2*a^e).
+
+  With e=2, the exponent for schoolbook multiplication, the minimum is
+  given by the values a=1-a=1/2.
+
+  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+  Other possible approximations follow:
+  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+  The values above where obtained with the following trivial commands
+  in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+  ,
+  |\
+  | \
+  +----,
+  |    |
+  |    |
+  |    |\
+  |    | \
+  +----+--`
+  ^ n2 ^n1^
+
+  For an actual implementation, the assumption that M(n)=n^e is
+  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+  with a constant k is wrong.
+
+  But theory suggest us two things:
+  - the best the multiplication product is (lower e), the more k
+    approaches 1, and a approaches 0.
+
+  - A value for a smaller than optimal is probably less bad than a
+    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_sqrlo_itch (mp_size_t n)
+{
+  return 2*n;
+}
+
+/*
+    mpn_dc_sqrlo requires a scratch space of 2*n limbs at tp.
+    It accepts tp == rp.
+*/
+static void
+mpn_dc_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t n2, n1;
+  ASSERT (n >= 2);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+  /* Divide-and-conquer */
+
+  /* We need fractional approximation of the value 0 < a <= 1/2
+     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+  */
+  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD*36/(36-11)))
+    n1 = n >> 1;
+  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD*36/(36-11)))
+    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD*40/(40-9)))
+    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD*10/9))
+    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
+  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
+  else
+    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
+
+  n2 = n - n1;
+
+  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0 */
+
+  /* x0 ^ 2 */
+  mpn_sqr (tp, xp, n2);
+  MPN_COPY (rp, tp, n2);
+
+  /* x1 * x0 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp + n2, n1, xp, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp + n2, xp, n1);
+  else
+    mpn_mullo_n (tp + n, xp + n2, xp, n1);
+  /* mpn_dc_mullo_n (tp + n, xp + n2, xp, n1, tp + n); */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  mpn_addlsh1_n (rp + n2, tp + n2, tp + n, n1);
+#else
+  mpn_lshift (rp + n2, tp + n, n1, 1);
+  mpn_add_n (rp + n2, rp + n2, tp + n2, n1);
+#endif
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
+#define SQR_BASECASE_ALLOC \
+ (SQRLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*SQRLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_sqrlo
+   accepts a pointer tp, and handle the case tp == rp, do the same here.
+*/
+
+void
+mpn_sqrlo (mp_ptr rp, mp_srcptr xp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+
+  if (BELOW_THRESHOLD (n, SQRLO_BASECASE_THRESHOLD))
+    {
+      /* FIXME: smarter criteria? */
+#if HAVE_NATIVE_mpn_mullo_basecase || ! HAVE_NATIVE_mpn_sqr_basecase
+      /* mullo computes as many products as sqr, but directly writes
+	 on the result area. */
+      mpn_mullo_basecase (rp, xp, xp, n);
+#else
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t tp[SQR_BASECASE_ALLOC];
+      mpn_sqr_basecase (tp, xp, n);
+      MPN_COPY (rp, tp, n);
+#endif
+    }
+  else if (BELOW_THRESHOLD (n, SQRLO_DC_THRESHOLD))
+    {
+      mpn_sqrlo_basecase (rp, xp, n);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (mpn_sqrlo_itch (n));
+      if (BELOW_THRESHOLD (n, SQRLO_SQR_THRESHOLD))
+	{
+	  mpn_dc_sqrlo (rp, xp, n, tp);
+	}
+      else
+	{
+	  /* For really large operands, use plain mpn_mul_n but throw away upper n
+	     limbs of result.  */
+#if !TUNE_PROGRAM_BUILD && (SQRLO_SQR_THRESHOLD > SQR_FFT_THRESHOLD)
+	  mpn_fft_mul (tp, xp, n, xp, n);
+#else
+	  mpn_sqr (tp, xp, n);
+#endif
+	  MPN_COPY (rp, tp, n);
+	}
+      TMP_FREE;
+    }
+}

diff --git a/mpn/generic/sqrlo_basecase.c b/mpn/generic/sqrlo_basecase.c
new file mode 100644
index 0000000..3148609
--- /dev/null
+++ b/mpn/generic/sqrlo_basecase.c

@@ -0,0 +1,194 @@
+/* mpn_sqrlo_basecase -- Internal routine to square a natural number
+   of length n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
+2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
+#if HAVE_NATIVE_mpn_addmul_1
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
+#else
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t _i;							\
+    for (_i = 0; _i < (n); _i++)					\
+      {									\
+	mp_limb_t ul, lpl;						\
+	ul = (up)[_i];							\
+	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
+	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
+      }									\
+  } while (0)
+#endif
+
+#define MPN_SQRLO_DIAGONAL(rp, up, n)					\
+  do {									\
+    mp_size_t nhalf;							\
+    nhalf = (n) >> 1;							\
+    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
+    if (((n) & 1) != 0)							\
+      {									\
+	mp_limb_t op;							\
+	op = (up)[nhalf];						\
+	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
+      }									\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
+    mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
+  } while (0)
+#else
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
+    mpn_lshift ((tp), (tp), (n) - 1, 1);				\
+    mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
+  } while (0)
+#endif
+
+/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
+#define SQRLO_BASECASE_ALLOC						\
+  (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
+
+/* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
+#ifndef SQRLO_SPECIAL_CASES
+#define SQRLO_SPECIAL_CASES 2
+#endif
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_special_cases 1
+#else
+#define MAYBE_special_cases \
+  ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
+#endif
+
+void
+mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_limb_t ul;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
+
+  ul = up[0];
+
+  if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
+    {
+#if SQRLO_SPECIAL_CASES == 1
+      rp[0] = (ul * ul) & GMP_NUMB_MASK;
+#else
+      if (n == 1)
+	rp[0] = (ul * ul) & GMP_NUMB_MASK;
+      else
+	{
+	  mp_limb_t hi, lo, ul1;
+	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lo >> GMP_NAIL_BITS;
+	  ul1 = up[1];
+#if SQRLO_SPECIAL_CASES == 2
+	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+#else
+	  if (n == 2)
+	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+	  else
+	    {
+	      mp_limb_t hi1;
+#if GMP_NAIL_BITS != 0
+	      ul <<= 1;
+#endif
+	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
+	      hi1 += ul * up[2];
+#if GMP_NAIL_BITS == 0
+	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
+	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
+#else
+	      hi += lo >> GMP_NAIL_BITS;
+	      rp[1] = hi & GMP_NUMB_MASK;
+	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+#endif
+	    }
+#endif
+	}
+#endif
+    }
+  else
+    {
+      mp_limb_t tp[SQRLO_BASECASE_ALLOC];
+      mp_size_t i;
+
+      /* must fit n-1 limbs in tp */
+      ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
+
+      --n;
+#if SQRLO_SHORTCUT_MULTIPLICATIONS
+      {
+	mp_limb_t cy;
+
+	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
+	for (i = 1; 2 * i + 1 < n; ++i)
+	  {
+	    ul = up[i];
+	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
+	  }
+	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
+      }
+#else
+      mpn_mul_1 (tp, up + 1, n, ul);
+      for (i = 1; 2 * i < n; ++i)
+	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
+#endif
+
+      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
+    }
+}
+#undef SQRLO_SPECIAL_CASES
+#undef MAYBE_special_cases
+#undef SQRLO_BASECASE_ALLOC
+#undef SQRLO_SHORTCUT_MULTIPLICATIONS
+#undef MPN_SQR_DIAGONAL
+#undef MPN_SQRLO_DIAGONAL
+#undef MPN_SQRLO_DIAG_ADDLSH1

diff --git a/mpn/generic/sqrmod_bnm1.c b/mpn/generic/sqrmod_bnm1.c
new file mode 100644
index 0000000..0acbe12
--- /dev/null
+++ b/mpn/generic/sqrmod_bnm1.c

@@ -0,0 +1,328 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2020, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr (tp, ap, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+   normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+  unsigned k;
+
+  ASSERT (0 < rn);
+
+  if (UNLIKELY (ap[rn]))
+    {
+      *rp = 1;
+      MPN_FILL (rp + 1, rn, 0);
+      return;
+    }
+  else if (MPN_SQRMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
+    {
+      mp_size_t n_k = rn / k;
+      TMP_DECL;
+
+      TMP_MARK;
+      mpn_sqrmod_bknp1 (rp, ap, n_k, k,
+			TMP_ALLOC_LIMBS (mpn_sqrmod_bknp1_itch (rn)));
+      TMP_FREE;
+      return;
+    }
+  mpn_sqr (tp, ap, rn);
+  cy = mpn_sub_n (rp, tp, tp + rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+
+/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * It should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires rn/4 < an <= rn
+ * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
+ *
+ * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+  ASSERT (0 < an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (an < rn))
+	{
+	  if (UNLIKELY (2*an <= rn))
+	    {
+	      mpn_sqr (rp, ap, an);
+	    }
+	  else
+	    {
+	      mp_limb_t cy;
+	      mpn_sqr (tp, ap, an);
+	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+	      MPN_INCR_U (rp, rn, cy);
+	    }
+	}
+      else
+	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      ASSERT (2*an > n);
+
+      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+	 and crt together as
+
+	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp  tp	/* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+
+      {
+	mp_srcptr am1;
+	mp_size_t anm;
+	mp_ptr so;
+
+	if (LIKELY (an > n))
+	  {
+	    so = xp + n;
+	    am1 = xp;
+	    cy = mpn_add (xp, a0, n, a1, an - n);
+	    MPN_INCR_U (xp, n, cy);
+	    anm = n;
+	  }
+	else
+	  {
+	    so = xp;
+	    am1 = a0;
+	    anm = an;
+	  }
+
+	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+      }
+
+      {
+	int       k;
+	mp_srcptr ap1;
+	mp_size_t anp;
+
+	if (LIKELY (an > n)) {
+	  ap1 = sp1;
+	  cy = mpn_sub (sp1, a0, n, a1, an - n);
+	  sp1[n] = 0;
+	  MPN_INCR_U (sp1, n + 1, cy);
+	  anp = n + ap1[n];
+	} else {
+	  ap1 = a0;
+	  anp = an;
+	}
+
+	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+	  k=0;
+	else
+	  {
+	    int mask;
+	    k = mpn_fft_best_k (n, 1);
+	    mask = (1<<k) -1;
+	    while (n & mask) {k--; mask >>=1;};
+	  }
+	if (k >= FFT_FIRST_K)
+	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+	else if (UNLIKELY (ap1 == a0))
+	  {
+	    ASSERT (anp <= n);
+	    ASSERT (2*anp > n);
+	    mpn_sqr (xp, a0, an);
+	    anp = 2*an - n;
+	    cy = mpn_sub (xp, xp, n, xp + n, anp);
+	    xp[n] = 0;
+	    MPN_INCR_U (xp, n+1, cy);
+	  }
+	else
+	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (2*an < rn))
+	{
+	  /* Note that in this case, the only way the result can equal
+	     zero mod B^{rn} - 1 is if the input is zero, and
+	     then the output of both the recursive calls and this CRT
+	     reconstruction is zero, not B^{rn} - 1. */
+	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
+
+	  /* FIXME: This subtraction of the high parts is not really
+	     necessary, we do it to get the carry out, and for sanity
+	     checking. */
+	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
+				   xp + 2*an - n, rn - 2*an, cy);
+	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
+	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
+	  ASSERT (cy == (xp + 2*an - n)[0]);
+	}
+      else
+	{
+	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+	     DECR will affect _at most_ the lowest n limbs. */
+	  MPN_DECR_U (rp, 2*n, cy);
+	}
+#undef a0
+#undef a1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_sqrmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
+}

diff --git a/mpn/generic/sqrtrem.c b/mpn/generic/sqrtrem.c
new file mode 100644
index 0000000..cc6dd9c
--- /dev/null
+++ b/mpn/generic/sqrtrem.c

@@ -0,0 +1,555 @@
+/* mpn_sqrtrem -- square root and remainder
+
+   Contributed to the GNU project by Paul Zimmermann (most code),
+   Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).
+
+   THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
+   FUTURE GMP RELEASE.
+
+Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015, 2017 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* See "Karatsuba Square Root", reference in gmp.texi.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#define USE_DIVAPPR_Q 1
+#define TRACE(x)
+
+static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
+{
+  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
+  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
+  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
+  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
+  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
+  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
+  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
+  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
+  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
+  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
+  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
+  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
+  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
+  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
+  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
+  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
+  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
+  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
+  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
+  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
+  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
+  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
+  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00  /* sqrt(1/1f8)..sqrt(1/1ff) */
+};
+
+/* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
+
+#if GMP_NUMB_BITS > 32
+#define MAGIC CNST_LIMB(0x10000000000)	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#else
+#define MAGIC CNST_LIMB(0x100000)		/* 0xfee6f < MAGIC < 0x29cbc8 */
+#endif
+
+static mp_limb_t
+mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
+{
+#if GMP_NUMB_BITS > 32
+  mp_limb_t a1;
+#endif
+  mp_limb_t x0, t2, t, x2;
+  unsigned abits;
+
+  ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
+  ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
+  ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);
+
+  /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
+     since we can do the former without division.  As part of the last
+     iteration convert from 1/sqrt(a) to sqrt(a).  */
+
+  abits = a0 >> (GMP_LIMB_BITS - 1 - 8);	/* extract bits for table lookup */
+  x0 = 0x100 | invsqrttab[abits - 0x80];	/* initial 1/sqrt(a) */
+
+  /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
+
+#if GMP_NUMB_BITS > 32
+  a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
+  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;
+  x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
+
+  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
+
+  t2 = x0 * (a0 >> (32-8));
+  t = t2 >> 25;
+  t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
+  x0 >>= 32;
+#else
+  t2 = x0 * (a0 >> (16-8));
+  t = t2 >> 13;
+  t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
+  x0 >>= 16;
+#endif
+
+  /* x0 is now a full limb approximation of sqrt(a0) */
+
+  x2 = x0 * x0;
+  if (x2 + 2*x0 <= a0 - 1)
+    {
+      x2 += 2*x0 + 1;
+      x0++;
+    }
+
+  *rp = a0 - x2;
+  return x0;
+}
+
+
+#define Prec (GMP_NUMB_BITS >> 1)
+#if ! defined(SQRTREM2_INPLACE)
+#define SQRTREM2_INPLACE 0
+#endif
+
+/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
+   return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
+#if SQRTREM2_INPLACE
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp)
+{
+  mp_srcptr np = rp;
+#else
+#define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp, rp)
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
+{
+#endif
+  mp_limb_t q, u, np0, sp0, rp0, q2;
+  int cc;
+
+  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
+
+  np0 = np[0];
+  sp0 = mpn_sqrtrem1 (rp, np[1]);
+  rp0 = rp[0];
+  /* rp0 <= 2*sp0 < 2^(Prec + 1) */
+  rp0 = (rp0 << (Prec - 1)) + (np0 >> (Prec + 1));
+  q = rp0 / sp0;
+  /* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */
+  q -= q >> Prec;
+  /* now we have q < 2^Prec */
+  u = rp0 - q * sp0;
+  /* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */
+  sp0 = (sp0 << Prec) | q;
+  cc = u >> (Prec - 1);
+  rp0 = ((u << (Prec + 1)) & GMP_NUMB_MASK) + (np0 & ((CNST_LIMB (1) << (Prec + 1)) - 1));
+  /* subtract q * q from rp */
+  q2 = q * q;
+  cc -= rp0 < q2;
+  rp0 -= q2;
+  if (cc < 0)
+    {
+      rp0 += sp0;
+      cc += rp0 < sp0;
+      --sp0;
+      rp0 += sp0;
+      cc += rp0 < sp0;
+    }
+
+  rp[0] = rp0;
+  sp[0] = sp0;
+  return cc;
+}
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
+   and in {np, n} the low n limbs of the remainder, returns the high
+   limb of the remainder (which is 0 or 1).
+   Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
+   where B=2^GMP_NUMB_BITS.
+   Needs a scratch of n/2+1 limbs. */
+static mp_limb_t
+mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n, mp_limb_t approx, mp_ptr scratch)
+{
+  mp_limb_t q;			/* carry out of {sp, n} */
+  int c, b;			/* carry out of remainder */
+  mp_size_t l, h;
+
+  ASSERT (n > 1);
+  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
+
+  l = n / 2;
+  h = n - l;
+  if (h == 1)
+    q = CALL_SQRTREM2_INPLACE (sp + l, np + 2 * l);
+  else
+    q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h, 0, scratch);
+  if (q != 0)
+    ASSERT_CARRY (mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h));
+  TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n, (unsigned) h, (unsigned) (n - h + 1)));
+  mpn_tdiv_qr (scratch, np + l, 0, np + l, n, sp + l, h);
+  q += scratch[l];
+  c = scratch[0] & 1;
+  mpn_rshift (sp, scratch, l, 1);
+  sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
+  if (UNLIKELY ((sp[0] & approx) != 0)) /* (sp[0] & mask) > 1 */
+    return 1; /* Remainder is non-zero */
+  q >>= 1;
+  if (c != 0)
+    c = mpn_add_n (np + l, np + l, sp + l, h);
+  TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+  mpn_sqr (np + n, sp, l);
+  b = q + mpn_sub_n (np, np, np + n, 2 * l);
+  c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
+
+  if (c < 0)
+    {
+      q = mpn_add_1 (sp + l, sp + l, h, q);
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+      c += mpn_addlsh1_n_ip1 (np, sp, n) + 2 * q;
+#else
+      c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+#endif
+      c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
+      q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
+    }
+
+  return c;
+}
+
+#if USE_DIVAPPR_Q
+static void
+mpn_divappr_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+  gmp_pi1_t inv;
+  mp_limb_t qh;
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  MPN_COPY (scratch, np, nn);
+  invert_pi1 (inv, dp[dn-1], dp[dn-2]);
+  if (BELOW_THRESHOLD (dn, DC_DIVAPPR_Q_THRESHOLD))
+    qh = mpn_sbpi1_divappr_q (qp, scratch, nn, dp, dn, inv.inv32);
+  else if (BELOW_THRESHOLD (dn, MU_DIVAPPR_Q_THRESHOLD))
+    qh = mpn_dcpi1_divappr_q (qp, scratch, nn, dp, dn, &inv);
+  else
+    {
+      mp_size_t itch = mpn_mu_divappr_q_itch (nn, dn, 0);
+      TMP_DECL;
+      TMP_MARK;
+      /* Sadly, scratch is too small. */
+      qh = mpn_mu_divappr_q (qp, np, nn, dp, dn, TMP_ALLOC_LIMBS (itch));
+      TMP_FREE;
+    }
+  qp [nn - dn] = qh;
+}
+#endif
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},
+   returns zero if the operand was a perfect square, one otherwise.
+   Assumes {np, 2n-odd}*4^nsh is normalized, i.e. B > np[2n-1-odd]*4^nsh >= B/4
+   where B=2^GMP_NUMB_BITS.
+   THINK: In the odd case, three more (dummy) limbs are taken into account,
+   when nsh is maximal, two limbs are discarded from the result of the
+   division. Too much? Is a single dummy limb enough? */
+static int
+mpn_dc_sqrt (mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)
+{
+  mp_limb_t q;			/* carry out of {sp, n} */
+  int c;			/* carry out of remainder */
+  mp_size_t l, h;
+  mp_ptr qp, tp, scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (np[2 * n - 1 - odd] != 0);
+  ASSERT (n > 4);
+  ASSERT (nsh < GMP_NUMB_BITS / 2);
+
+  l = (n - 1) / 2;
+  h = n - l;
+  ASSERT (n >= l + 2 && l + 2 >= h && h > l && l >= 1 + odd);
+  scratch = TMP_ALLOC_LIMBS (l + 2 * n + 5 - USE_DIVAPPR_Q); /* n + 2-USE_DIVAPPR_Q */
+  tp = scratch + n + 2 - USE_DIVAPPR_Q; /* n + h + 1, but tp [-1] is writable */
+  if (nsh != 0)
+    {
+      /* o is used to exactly set the lowest bits of the dividend, is it needed? */
+      int o = l > (1 + odd);
+      ASSERT_NOCARRY (mpn_lshift (tp - o, np + l - 1 - o - odd, n + h + 1 + o, 2 * nsh));
+    }
+  else
+    MPN_COPY (tp, np + l - 1 - odd, n + h + 1);
+  q = mpn_dc_sqrtrem (sp + l, tp + l + 1, h, 0, scratch);
+  if (q != 0)
+    ASSERT_CARRY (mpn_sub_n (tp + l + 1, tp + l + 1, sp + l, h));
+  qp = tp + n + 1; /* l + 2 */
+  TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n+1, (unsigned) h, (unsigned) (n + 1 - h + 1)));
+#if USE_DIVAPPR_Q
+  mpn_divappr_q (qp, tp, n + 1, sp + l, h, scratch);
+#else
+  mpn_div_q (qp, tp, n + 1, sp + l, h, scratch);
+#endif
+  q += qp [l + 1];
+  c = 1;
+  if (q > 1)
+    {
+      /* FIXME: if s!=0 we will shift later, a noop on this area. */
+      MPN_FILL (sp, l, GMP_NUMB_MAX);
+    }
+  else
+    {
+      /* FIXME: if s!=0 we will shift again later, shift just once. */
+      mpn_rshift (sp, qp + 1, l, 1);
+      sp[l - 1] |= q << (GMP_NUMB_BITS - 1);
+      if (((qp[0] >> (2 + USE_DIVAPPR_Q)) | /* < 3 + 4*USE_DIVAPPR_Q */
+	   (qp[1] & (GMP_NUMB_MASK >> ((GMP_NUMB_BITS >> odd)- nsh - 1)))) == 0)
+	{
+	  mp_limb_t cy;
+	  /* Approximation is not good enough, the extra limb(+ nsh bits)
+	     is smaller than needed to absorb the possible error. */
+	  /* {qp + 1, l + 1} equals 2*{sp, l} */
+	  /* FIXME: use mullo or wrap-around, or directly evaluate
+	     remainder with a single sqrmod_bnm1. */
+	  TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h, (unsigned) (l+1)));
+	  ASSERT_NOCARRY (mpn_mul (scratch, sp + l, h, qp + 1, l + 1));
+	  /* Compute the remainder of the previous mpn_div(appr)_q. */
+	  cy = mpn_sub_n (tp + 1, tp + 1, scratch, h);
+#if USE_DIVAPPR_Q || WANT_ASSERT
+	  MPN_DECR_U (tp + 1 + h, l, cy);
+#if USE_DIVAPPR_Q
+	  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) <= 0);
+	  if (mpn_cmp (tp + 1 + h, scratch + h, l) < 0)
+	    {
+	      /* May happen only if div result was not exact. */
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
+	      cy = mpn_addlsh1_n_ip1 (tp + 1, sp + l, h);
+#else
+	      cy = mpn_addmul_1 (tp + 1, sp + l, h, CNST_LIMB(2));
+#endif
+	      ASSERT_NOCARRY (mpn_add_1 (tp + 1 + h, tp + 1 + h, l, cy));
+	      MPN_DECR_U (sp, l, 1);
+	    }
+	  /* Can the root be exact when a correction was needed? We
+	     did not find an example, but it depends on divappr
+	     internals, and we can not assume it true in general...*/
+	  /* else */
+#else /* WANT_ASSERT */
+	  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) == 0);
+#endif
+#endif
+	  if (mpn_zero_p (tp + l + 1, h - l))
+	    {
+	      TRACE(printf("sqr(,,%u)\n", (unsigned) l));
+	      mpn_sqr (scratch, sp, l);
+	      c = mpn_cmp (tp + 1, scratch + l, l);
+	      if (c == 0)
+		{
+		  if (nsh != 0)
+		    {
+		      mpn_lshift (tp, np, l, 2 * nsh);
+		      np = tp;
+		    }
+		  c = mpn_cmp (np, scratch + odd, l - odd);
+		}
+	      if (c < 0)
+		{
+		  MPN_DECR_U (sp, l, 1);
+		  c = 1;
+		}
+	    }
+	}
+    }
+  TMP_FREE;
+
+  if ((odd | nsh) != 0)
+    mpn_rshift (sp, sp, n, nsh + (odd ? GMP_NUMB_BITS / 2 : 0));
+  return c;
+}
+
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
+{
+  mp_limb_t cc, high, rl;
+  int c;
+  mp_size_t rn, tn;
+  TMP_DECL;
+
+  ASSERT (nn > 0);
+  ASSERT_MPN (np, nn);
+
+  ASSERT (np[nn - 1] != 0);
+  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
+  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
+  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
+
+  high = np[nn - 1];
+  if (high & (GMP_NUMB_HIGHBIT | (GMP_NUMB_HIGHBIT / 2)))
+    c = 0;
+  else
+    {
+      count_leading_zeros (c, high);
+      c -= GMP_NAIL_BITS;
+
+      c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
+    }
+  if (nn == 1) {
+    if (c == 0)
+      {
+	sp[0] = mpn_sqrtrem1 (&rl, high);
+	if (rp != NULL)
+	  rp[0] = rl;
+      }
+    else
+      {
+	cc = mpn_sqrtrem1 (&rl, high << (2*c)) >> c;
+	sp[0] = cc;
+	if (rp != NULL)
+	  rp[0] = rl = high - cc*cc;
+      }
+    return rl != 0;
+  }
+  if (nn == 2) {
+    mp_limb_t tp [2];
+    if (rp == NULL) rp = tp;
+    if (c == 0)
+      {
+#if SQRTREM2_INPLACE
+	rp[1] = high;
+	rp[0] = np[0];
+	cc = CALL_SQRTREM2_INPLACE (sp, rp);
+#else
+	cc = mpn_sqrtrem2 (sp, rp, np);
+#endif
+	rp[1] = cc;
+	return ((rp[0] | cc) != 0) + cc;
+      }
+    else
+      {
+	rl = np[0];
+	rp[1] = (high << (2*c)) | (rl >> (GMP_NUMB_BITS - 2*c));
+	rp[0] = rl << (2*c);
+	CALL_SQRTREM2_INPLACE (sp, rp);
+	cc = sp[0] >>= c;	/* c != 0, the highest bit of the root cc is 0. */
+	rp[0] = rl -= cc*cc;	/* Computed modulo 2^GMP_LIMB_BITS, because it's smaller. */
+	return rl != 0;
+      }
+  }
+  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
+
+  if ((rp == NULL) && (nn > 8))
+    return mpn_dc_sqrt (sp, np, tn, c, nn & 1);
+  TMP_MARK;
+  if (((nn & 1) | c) != 0)
+    {
+      mp_limb_t s0[1], mask;
+      mp_ptr tp, scratch;
+      TMP_ALLOC_LIMBS_2 (tp, 2 * tn, scratch, tn / 2 + 1);
+      tp[0] = 0;	     /* needed only when 2*tn > nn, but saves a test */
+      if (c != 0)
+	mpn_lshift (tp + (nn & 1), np, nn, 2 * c);
+      else
+	MPN_COPY (tp + (nn & 1), np, nn);
+      c += (nn & 1) ? GMP_NUMB_BITS / 2 : 0;		/* c now represents k */
+      mask = (CNST_LIMB (1) << c) - 1;
+      rl = mpn_dc_sqrtrem (sp, tp, tn, (rp == NULL) ? mask - 1 : 0, scratch);
+      /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
+	 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
+      s0[0] = sp[0] & mask;	/* S mod 2^k */
+      rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);	/* R = R + 2*s0*S */
+      cc = mpn_submul_1 (tp, s0, 1, s0[0]);
+      rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
+      mpn_rshift (sp, sp, tn, c);
+      tp[tn] = rl;
+      if (rp == NULL)
+	rp = tp;
+      c = c << 1;
+      if (c < GMP_NUMB_BITS)
+	tn++;
+      else
+	{
+	  tp++;
+	  c -= GMP_NUMB_BITS;
+	}
+      if (c != 0)
+	mpn_rshift (rp, tp, tn, c);
+      else
+	MPN_COPY_INCR (rp, tp, tn);
+      rn = tn;
+    }
+  else
+    {
+      if (rp != np)
+	{
+	  if (rp == NULL) /* nn <= 8 */
+	    rp = TMP_SALLOC_LIMBS (nn);
+	  MPN_COPY (rp, np, nn);
+	}
+      rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn, 0, TMP_ALLOC_LIMBS(tn / 2 + 1)));
+    }
+
+  MPN_NORMALIZE (rp, rn);
+
+  TMP_FREE;
+  return rn;
+}

diff --git a/mpn/generic/strongfibo.c b/mpn/generic/strongfibo.c
new file mode 100644
index 0000000..7e8d612
--- /dev/null
+++ b/mpn/generic/strongfibo.c

@@ -0,0 +1,219 @@
+/* mpn_fib2m -- calculate Fibonacci numbers, modulo m.
+
+Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009, 2018, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n
+/* Stores |{ap,n}-{bp,n}| in {rp,n},
+   returns the sign of {ap,n}-{bp,n}. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+        {
+          ++n;
+          if (x > y)
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, ap, bp, n));
+              return 1;
+            }
+          else
+            {
+              ASSERT_NOCARRY (mpn_sub_n (rp, bp, ap, n));
+              return -1;
+            }
+        }
+      rp[n] = 0;
+    }
+  return 0;
+}
+#endif
+
+/* Computes at most count terms of the sequence needed by the
+   Lucas-Lehmer-Riesel test, indexing backward:
+   L_i = L_{i+1}^2 - 2
+
+   The sequence is computed modulo M = {mp, mn}.
+   The starting point is given in L_{count+1} = {lp, mn}.
+   The scratch pointed by sp, needs a space of at least 3 * mn + 1 limbs.
+
+   Returns the index i>0 if L_i = 0 (mod M) is found within the
+   computed count terms of the sequence.  Otherwise it returns zero.
+
+   Note: (+/-2)^2-2=2, (+/-1)^2-2=-1, 0^2-2=-2
+ */
+
+static mp_bitcnt_t
+mpn_llriter (mp_ptr lp, mp_srcptr mp, mp_size_t mn, mp_bitcnt_t count, mp_ptr sp)
+{
+  do
+    {
+      mpn_sqr (sp, lp, mn);
+      mpn_tdiv_qr (sp + 2 * mn, lp, 0, sp, 2 * mn, mp, mn);
+      if (lp[0] < 5)
+	{
+	  /* If L^2 % M < 5, |L^2 % M - 2| <= 2 */
+	  if (mn == 1 || mpn_zero_p (lp + 1, mn - 1))
+	    return (lp[0] == 2) ? count : 0;
+	  else
+	    MPN_DECR_U (lp, mn, 2);
+	}
+      else
+	lp[0] -= 2;
+    } while (--count != 0);
+  return 0;
+}
+
+/* Store the Lucas' number L[n] at lp (maybe), computed modulo m.  lp
+   and scratch should have room for mn*2+1 limbs.
+
+   Returns the size of L[n] normally.
+
+   If F[n] is zero modulo m, or L[n] is, returns 0 and lp is
+   undefined.
+*/
+
+static mp_size_t
+mpn_lucm (mp_ptr lp, mp_srcptr np, mp_size_t nn, mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+  int		neg;
+  mp_limb_t	cy;
+
+  ASSERT (! MPN_OVERLAP_P (lp, MAX(2*mn+1,5), scratch, MAX(2*mn+1,5)));
+  ASSERT (nn > 0);
+
+  neg = mpn_fib2m (lp, scratch, np, nn, mp, mn);
+
+  /* F[n] = +/-{lp, mn}, F[n-1] = +/-{scratch, mn} */
+  if (mpn_zero_p (lp, mn))
+    return 0;
+
+  if (neg) /* One sign is opposite, use sub instead of add. */
+    {
+#if HAVE_NATIVE_mpn_rsblsh1_n || HAVE_NATIVE_mpn_sublsh1_n
+#if HAVE_NATIVE_mpn_rsblsh1_n
+      cy = mpn_rsblsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+#else
+      cy = mpn_sublsh1_n (lp, lp, scratch, mn); /* L[n] = -/+(F[n]-(-2F[n-1])) */
+      if (cy != 0)
+	cy = mpn_add_n (lp, lp, mp, mn) - cy;
+#endif
+      if (cy > 1)
+	cy += mpn_add_n (lp, lp, mp, mn);
+#else
+      cy = mpn_lshift (scratch, scratch, mn, 1); /* 2F[n-1] */
+      if (UNLIKELY (cy))
+	cy -= mpn_sub_n (lp, scratch, lp, mn); /* L[n] = +/-(2F[n-1]-(-F[n])) */
+      else
+	abs_sub_n (lp, lp, scratch, mn);
+#endif
+      ASSERT (cy <= 1);
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = mpn_addlsh1_n (lp, lp, scratch, mn); /* L[n] = +/-(2F[n-1]+F[n])) */
+#else
+      cy = mpn_lshift (scratch, scratch, mn, 1);
+      cy+= mpn_add_n (lp, lp, scratch, mn);
+#endif
+      ASSERT (cy <= 2);
+    }
+  while (cy || mpn_cmp (lp, mp, mn) >= 0)
+    cy -= mpn_sub_n (lp, lp, mp, mn);
+  MPN_NORMALIZE (lp, mn);
+  return mn;
+}
+
+int
+mpn_strongfibo (mp_srcptr mp, mp_size_t mn, mp_ptr scratch)
+{
+  mp_ptr	lp, sp;
+  mp_size_t	en;
+  mp_bitcnt_t	b0;
+  TMP_DECL;
+
+#if GMP_NUMB_BITS % 4 == 0
+  b0 = mpn_scan0 (mp, 0);
+#else
+  {
+    mpz_t m = MPZ_ROINIT_N(mp, mn);
+    b0 = mpz_scan0 (m, 0);
+  }
+  if (UNLIKELY (b0 == mn * GMP_NUMB_BITS))
+    {
+      en = 1;
+      scratch [0] = 1;
+    }
+  else
+#endif
+    {
+      int cnt = b0 % GMP_NUMB_BITS;
+      en = b0 / GMP_NUMB_BITS;
+      if (LIKELY (cnt != 0))
+	mpn_rshift (scratch, mp + en, mn - en, cnt);
+      else
+	MPN_COPY (scratch, mp + en, mn - en);
+      en = mn - en;
+      scratch [0] |= 1;
+      en -= scratch [en - 1] == 0;
+    }
+  TMP_MARK;
+
+  lp = TMP_ALLOC_LIMBS (4 * mn + 6);
+  sp = lp + 2 * mn + 3;
+  en = mpn_lucm (sp, scratch, en, mp, mn, lp);
+  if (en != 0 && LIKELY (--b0 != 0))
+    {
+      mpn_sqr (lp, sp, en);
+      lp [0] |= 2; /* V^2 + 2 */
+      if (LIKELY (2 * en >= mn))
+	mpn_tdiv_qr (sp, lp, 0, lp, 2 * en, mp, mn);
+      else
+	MPN_ZERO (lp + 2 * en, mn - 2 * en);
+      if (! mpn_zero_p (lp, mn) && LIKELY (--b0 != 0))
+	b0 = mpn_llriter (lp, mp, mn, b0, lp + mn + 1);
+    }
+  TMP_FREE;
+  return (b0 != 0);
+}

diff --git a/mpn/generic/sub.c b/mpn/generic/sub.c
new file mode 100644
index 0000000..df0afd6
--- /dev/null
+++ b/mpn/generic/sub.c

@@ -0,0 +1,33 @@
+/* mpn_sub - subtract mpn from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/sub_1.c b/mpn/generic/sub_1.c
new file mode 100644
index 0000000..a20f191
--- /dev/null
+++ b/mpn/generic/sub_1.c

@@ -0,0 +1,33 @@
+/* mpn_sub_1 - subtract limb from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub_1 1
+
+#include "gmp-impl.h"

diff --git a/mpn/generic/sub_err1_n.c b/mpn/generic/sub_err1_n.c
new file mode 100644
index 0000000..beca57e
--- /dev/null
+++ b/mpn/generic/sub_err1_n.c

@@ -0,0 +1,100 @@
+/* mpn_sub_err1_n -- sub_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}

diff --git a/mpn/generic/sub_err2_n.c b/mpn/generic/sub_err2_n.c
new file mode 100644
index 0000000..1edf8d6
--- /dev/null
+++ b/mpn/generic/sub_err2_n.c

@@ -0,0 +1,116 @@
+/* mpn_sub_err2_n -- sub_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}

diff --git a/mpn/generic/sub_err3_n.c b/mpn/generic/sub_err3_n.c
new file mode 100644
index 0000000..2db3c63
--- /dev/null
+++ b/mpn/generic/sub_err3_n.c

@@ -0,0 +1,131 @@
+/* mpn_sub_err3_n -- sub_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}

diff --git a/mpn/generic/sub_n.c b/mpn/generic/sub_n.c
new file mode 100644
index 0000000..b192c96
--- /dev/null
+++ b/mpn/generic/sub_n.c

@@ -0,0 +1,89 @@
+/* mpn_sub_n -- Subtract equal length limb vectors.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif

diff --git a/mpn/generic/submul_1.c b/mpn/generic/submul_1.c
new file mode 100644
index 0000000..4744274
--- /dev/null
+++ b/mpn/generic/submul_1.c

@@ -0,0 +1,144 @@
+/* mpn_submul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   subtract the N least significant limbs of the product from the limb
+   vector pointed to by RP.  Return the most significant limb of the
+   product, adjusted for carry-out from the subtraction.
+
+Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t u0, crec, c, p1, p0, r0;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  crec = 0;
+  do
+    {
+      u0 = *up++;
+      umul_ppmm (p1, p0, u0, v0);
+
+      r0 = *rp;
+
+      p0 = r0 - p0;
+      c = r0 < p0;
+
+      p1 = p1 + c;
+
+      r0 = p0 - crec;		/* cycle 0, 3, ... */
+      c = p0 < r0;		/* cycle 1, 4, ... */
+
+      crec = p1 + c;		/* cycle 2, 5, ... */
+
+      *rp++ = r0;
+    }
+  while (--n != 0);
+
+  return crec;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, cl, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  cl = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      SUBC_LIMB (c1, xl, r0, prev_p1);
+      SUBC_LIMB (c2, xl, xl, p0);
+      SUBC_LIMB (c3, xl, xl, cl);
+      cl = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 + cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
+{
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (v0);
+
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  cl = 0;
+  prev_p1 = 0;
+  do
+    {
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      xw = r0 - (prev_p1 + p0) + cl;
+      cl = (mp_limb_signed_t) xw >> GMP_NUMB_BITS; /* FIXME: non-portable */
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_p1 = p1;
+    }
+  while (--n != 0);
+
+  return prev_p1 - cl;
+}
+
+#endif

diff --git a/mpn/generic/tdiv_qr.c b/mpn/generic/tdiv_qr.c
new file mode 100644
index 0000000..92ff33c
--- /dev/null
+++ b/mpn/generic/tdiv_qr.c

@@ -0,0 +1,386 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
+   qxn is non-zero, generate that many fraction limbs and append them after the
+   other quotient limbs, and update the remainder accordingly.  The input
+   operands are unaffected.
+
+   Preconditions:
+   1. The most significant limb of the divisor must be non-zero.
+   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
+
+   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+   complexity of multiplication.
+
+Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT_ALWAYS (qxn == 0);
+
+  ASSERT (nn >= 0);
+  ASSERT (dn >= 0);
+  ASSERT (dn == 0 || dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
+
+  switch (dn)
+    {
+    case 0:
+      DIVIDE_BY_ZERO;
+
+    case 1:
+      {
+	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+	return;
+      }
+
+    case 2:
+      {
+	mp_ptr n2p;
+	mp_limb_t qhl, cy;
+	TMP_DECL;
+	TMP_MARK;
+	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
+	  {
+	    int cnt;
+	    mp_limb_t d2p[2];
+	    count_leading_zeros (cnt, dp[1]);
+	    cnt -= GMP_NAIL_BITS;
+	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
+	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
+	    n2p = TMP_ALLOC_LIMBS (nn + 1);
+	    cy = mpn_lshift (n2p, np, nn, cnt);
+	    n2p[nn] = cy;
+	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+	    if (cy == 0)
+	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
+	    rp[0] = (n2p[0] >> cnt)
+	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
+	    rp[1] = (n2p[1] >> cnt);
+	  }
+	else
+	  {
+	    n2p = TMP_ALLOC_LIMBS (nn);
+	    MPN_COPY (n2p, np, nn);
+	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
+	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
+	    rp[0] = n2p[0];
+	    rp[1] = n2p[1];
+	  }
+	TMP_FREE;
+	return;
+      }
+
+    default:
+      {
+	int adjust;
+	gmp_pi1_t dinv;
+	TMP_DECL;
+	TMP_MARK;
+	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
+	if (nn + adjust >= 2 * dn)
+	  {
+	    mp_ptr n2p, d2p;
+	    mp_limb_t cy;
+	    int cnt;
+
+	    qp[nn - dn] = 0;			  /* zero high quotient limb */
+	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
+	      {
+		count_leading_zeros (cnt, dp[dn - 1]);
+		cnt -= GMP_NAIL_BITS;
+		d2p = TMP_ALLOC_LIMBS (dn);
+		mpn_lshift (d2p, dp, dn, cnt);
+		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		cy = mpn_lshift (n2p, np, nn, cnt);
+		n2p[nn] = cy;
+		nn += adjust;
+	      }
+	    else
+	      {
+		cnt = 0;
+		d2p = (mp_ptr) dp;
+		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		MPN_COPY (n2p, np, nn);
+		n2p[nn] = 0;
+		nn += adjust;
+	      }
+
+	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
+	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
+	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
+	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
+		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
+	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+	    else
+	      {
+		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
+		n2p = rp;
+	      }
+
+	    if (cnt != 0)
+	      mpn_rshift (rp, n2p, dn, cnt);
+	    else
+	      MPN_COPY (rp, n2p, dn);
+	    TMP_FREE;
+	    return;
+	  }
+
+	/* When we come here, the numerator/partial remainder is less
+	   than twice the size of the denominator.  */
+
+	  {
+	    /* Problem:
+
+	       Divide a numerator N with nn limbs by a denominator D with dn
+	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
+	       compared to dn, conventional division algorithms perform poorly.
+	       We want an algorithm that has an expected running time that is
+	       dependent only on qn.
+
+	       Algorithm (very informally stated):
+
+	       1) Divide the 2 x qn most significant limbs from the numerator
+		  by the qn most significant limbs from the denominator.  Call
+		  the result qest.  This is either the correct quotient, but
+		  might be 1 or 2 too large.  Compute the remainder from the
+		  division.  (This step is implemented by an mpn_divrem call.)
+
+	       2) Is the most significant limb from the remainder < p, where p
+		  is the product of the most significant limb from the quotient
+		  and the next(d)?  (Next(d) denotes the next ignored limb from
+		  the denominator.)  If it is, decrement qest, and adjust the
+		  remainder accordingly.
+
+	       3) Is the remainder >= qest?  If it is, qest is the desired
+		  quotient.  The algorithm terminates.
+
+	       4) Subtract qest x next(d) from the remainder.  If there is
+		  borrow out, decrement qest, and adjust the remainder
+		  accordingly.
+
+	       5) Skip one word from the denominator (i.e., let next(d) denote
+		  the next less significant limb.  */
+
+	    mp_size_t qn;
+	    mp_ptr n2p, d2p;
+	    mp_ptr tp;
+	    mp_limb_t cy;
+	    mp_size_t in, rn;
+	    mp_limb_t quotient_too_large;
+	    unsigned int cnt;
+
+	    qn = nn - dn;
+	    qp[qn] = 0;				/* zero high quotient limb */
+	    qn += adjust;			/* qn cannot become bigger */
+
+	    if (qn == 0)
+	      {
+		MPN_COPY (rp, np, dn);
+		TMP_FREE;
+		return;
+	      }
+
+	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
+	    /* Normalize denominator by shifting it to the left such that its
+	       most significant bit is set.  Then shift the numerator the same
+	       amount, to mathematically preserve quotient.  */
+	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
+	      {
+		count_leading_zeros (cnt, dp[dn - 1]);
+		cnt -= GMP_NAIL_BITS;
+
+		d2p = TMP_ALLOC_LIMBS (qn);
+		mpn_lshift (d2p, dp + in, qn, cnt);
+		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
+
+		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+		if (adjust)
+		  {
+		    n2p[2 * qn] = cy;
+		    n2p++;
+		  }
+		else
+		  {
+		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
+		  }
+	      }
+	    else
+	      {
+		cnt = 0;
+		d2p = (mp_ptr) dp + in;
+
+		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+		if (adjust)
+		  {
+		    n2p[2 * qn] = 0;
+		    n2p++;
+		  }
+	      }
+
+	    /* Get an approximate quotient using the extracted operands.  */
+	    if (qn == 1)
+	      {
+		mp_limb_t q0, r0;
+		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
+		n2p[0] = r0 >> GMP_NAIL_BITS;
+		qp[0] = q0;
+	      }
+	    else if (qn == 2)
+	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+	    else
+	      {
+		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
+		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
+		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
+		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
+		else
+		  {
+		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
+		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+		    mp_ptr r2p = rp;
+		    if (np == r2p)	/* If N and R share space, put ... */
+		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
+		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
+		    MPN_COPY (n2p, r2p, qn);
+		  }
+	      }
+
+	    rn = qn;
+	    /* Multiply the first ignored divisor limb by the most significant
+	       quotient limb.  If that product is > the partial remainder's
+	       most significant limb, we know the quotient is too large.  This
+	       test quickly catches most cases where the quotient is too large;
+	       it catches all cases where the quotient is 2 too large.  */
+	    {
+	      mp_limb_t dl, x;
+	      mp_limb_t h, dummy;
+
+	      if (in - 2 < 0)
+		dl = 0;
+	      else
+		dl = dp[in - 2];
+
+#if GMP_NAIL_BITS == 0
+	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+#else
+	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
+	      if (cnt != 0)
+		x |= dl >> (GMP_NUMB_BITS - cnt);
+#endif
+	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
+
+	      if (n2p[qn - 1] < h)
+		{
+		  mp_limb_t cy;
+
+		  mpn_decr_u (qp, (mp_limb_t) 1);
+		  cy = mpn_add_n (n2p, n2p, d2p, qn);
+		  if (cy)
+		    {
+		      /* The partial remainder is safely large.  */
+		      n2p[qn] = cy;
+		      ++rn;
+		    }
+		}
+	    }
+
+	    quotient_too_large = 0;
+	    if (cnt != 0)
+	      {
+		mp_limb_t cy1, cy2;
+
+		/* Append partially used numerator limb to partial remainder.  */
+		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
+		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
+
+		/* Update partial remainder with partially used divisor limb.  */
+		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
+		if (qn != rn)
+		  {
+		    ASSERT_ALWAYS (n2p[qn] >= cy2);
+		    n2p[qn] -= cy2;
+		  }
+		else
+		  {
+		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
+
+		    quotient_too_large = (cy1 < cy2);
+		    ++rn;
+		  }
+		--in;
+	      }
+	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
+
+	    tp = TMP_ALLOC_LIMBS (dn);
+
+	    if (in < qn)
+	      {
+		if (in == 0)
+		  {
+		    MPN_COPY (rp, n2p, rn);
+		    ASSERT_ALWAYS (rn == dn);
+		    goto foo;
+		  }
+		mpn_mul (tp, qp, qn, dp, in);
+	      }
+	    else
+	      mpn_mul (tp, dp, in, qp, qn);
+
+	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+	    MPN_COPY (rp + in, n2p, dn - in);
+	    quotient_too_large |= cy;
+	    cy = mpn_sub_n (rp, np, tp, in);
+	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+	    quotient_too_large |= cy;
+	  foo:
+	    if (quotient_too_large)
+	      {
+		mpn_decr_u (qp, (mp_limb_t) 1);
+		mpn_add_n (rp, rp, dp, dn);
+	      }
+	  }
+	TMP_FREE;
+	return;
+      }
+    }
+}

diff --git a/mpn/generic/toom22_mul.c b/mpn/generic/toom22_mul.c
new file mode 100644
index 0000000..da56014
--- /dev/null
+++ b/mpn/generic/toom22_mul.c

@@ -0,0 +1,222 @@
+/* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
+   accurately, bn <= an < 2bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+   |b1_|___b0_|
+   <-t-><--n-->
+
+  v0  =  a0     * b0       #   A(0)*B(0)
+  vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
+  vinf=      a1 *     b1   # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
+   relative unbalance will become a larger and larger relative unbalance with
+   each recursion (the difference s-t will be invariant over recursive calls).
+   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
+  do {									\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, an, b, bn);				\
+    else if (4 * an < 5 * bn)						\
+      mpn_toom22_mul (p, a, an, b, bn, ws);				\
+    else								\
+      mpn_toom32_mul (p, a, an, b, bn, ws);				\
+  } while (0)
+
+void
+mpn_toom22_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+  mp_ptr bsm1;
+
+#define a0  ap
+#define a1  (ap + n)
+#define b0  bp
+#define b1  (bp + n)
+
+  s = an >> 1;
+  n = an - s;
+  t = bn - n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n && (n - s) == (an & 1));
+  ASSERT (0 < t && t <= s);
+
+  asm1 = pp;
+  bsm1 = pp + n;
+
+  vm1_neg = 0;
+
+  /* Compute asm1.  */
+  if ((an & 1) == 0) /* s == n */
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, n);
+	  vm1_neg = 1;
+	}
+      else
+	{
+	  mpn_sub_n (asm1, a0, a1, n);
+	}
+    }
+  else /* n - s == 1 */
+    {
+      if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, s);
+	  asm1[s] = 0;
+	  vm1_neg = 1;
+	}
+      else
+	{
+	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+	}
+    }
+
+  /* Compute bsm1.  */
+  if (t == n)
+    {
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+    }
+  else
+    {
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+#define v0	pp				/* 2n */
+#define vinf	(pp + 2 * n)			/* s+t */
+#define vm1	scratch				/* 2n */
+#define scratch_out	scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+
+  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
+  else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + (H(v0) + L(vinf)) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* (H(v0) + L(vinf)) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
+
+  if (vm1_neg)
+    cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
+  else {
+    cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+    if (UNLIKELY (cy + 1 == 0)) { /* cy is negative */
+      /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+      /* The borrow in cy stops the propagation of the carry cy2, */
+      ASSERT (cy2 == 1);
+      cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+      ASSERT (cy == 0);
+#else
+      /* we simply fill the area with zeros. */
+      MPN_FILL (pp + 2 * n, n, 0);
+      /* ASSERT (s + t == n || mpn_zero_p (pp + 3 * n, s + t - n)); */
+#endif
+      return;
+    }
+  }
+
+  ASSERT (cy  <= 2);
+  ASSERT (cy2 <= 2);
+
+  MPN_INCR_U (pp + 2 * n, s + t, cy2);
+  /* if s+t==n, cy is zero, but we should not access pp[3*n] at all. */
+  MPN_INCR_U (pp + 3 * n, s + t - n, cy);
+}

diff --git a/mpn/generic/toom2_sqr.c b/mpn/generic/toom2_sqr.c
new file mode 100644
index 0000000..db7a846
--- /dev/null
+++ b/mpn/generic/toom2_sqr.c

@@ -0,0 +1,155 @@
+/* mpn_toom2_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+
+  v0  =  a0     ^2  #   A(0)^2
+  vm1 = (a0- a1)^2  #  A(-1)^2
+  vinf=      a1 ^2  # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_toom2   1
+#else
+#define MAYBE_sqr_toom2							\
+  (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
+#endif
+
+#define TOOM2_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (! MAYBE_sqr_toom2						\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else								\
+      mpn_toom2_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom2_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+
+#define a0  ap
+#define a1  (ap + n)
+
+  s = an >> 1;
+  n = an - s;
+
+  ASSERT (0 < s && s <= n && (n - s) == (an & 1));
+
+  asm1 = pp;
+
+  /* Compute asm1.  */
+  if ((an & 1) == 0) /* s == n */
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, n);
+	}
+      else
+	{
+	  mpn_sub_n (asm1, a0, a1, n);
+	}
+    }
+  else /* n - s == 1 */
+    {
+      if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
+	{
+	  mpn_sub_n (asm1, a1, a0, s);
+	  asm1[s] = 0;
+	}
+      else
+	{
+	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
+	}
+    }
+
+#define v0	pp				/* 2n */
+#define vinf	(pp + 2 * n)			/* s+s */
+#define vm1	scratch				/* 2n */
+#define scratch_out	scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+
+  /* vinf, s+s limbs */
+  TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM2_SQR_REC (v0, ap, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + H(v0) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* L(vinf) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
+
+  cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+
+  ASSERT (cy + 1 <= 3);
+  ASSERT (cy2 <= 2);
+
+  if (LIKELY (cy <= 2)) {
+    MPN_INCR_U (pp + 2 * n, s + s, cy2);
+    MPN_INCR_U (pp + 3 * n, s + s - n, cy);
+  } else { /* cy is negative */
+    /* The total contribution of v0+vinf-vm1 can not be negative. */
+#if WANT_ASSERT
+    /* The borrow in cy stops the propagation of the carry cy2, */
+    ASSERT (cy2 == 1);
+    cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
+    ASSERT (cy == 0);
+#else
+    /* we simply fill the area with zeros. */
+    MPN_FILL (pp + 2 * n, n, 0);
+#endif
+  }
+}

diff --git a/mpn/generic/toom32_mul.c b/mpn/generic/toom32_mul.c
new file mode 100644
index 0000000..1139d17
--- /dev/null
+++ b/mpn/generic/toom32_mul.c

@@ -0,0 +1,320 @@
+/* mpn_toom32_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 1.5
+   times as large as bn.  Or more accurately, bn < an < 3bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Improvements by Marco Bodrato and Niels Möller.
+
+   The idea of applying Toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2020, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +inf
+
+  <-s-><--n--><--n-->
+   ___ ______ ______
+  |a2_|___a1_|___a0_|
+	|_b1_|___b0_|
+	<-t--><--n-->
+
+  v0  =  a0         * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1) #   A(1)*B(1)      ah  <= 2  bh <= 1
+  vm1 = (a0- a1+ a2)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh = 0
+  vinf=          a2 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM32_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    mpn_mul_n (p, a, b, n);						\
+  } while (0)
+
+void
+mpn_toom32_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy;
+  mp_limb_signed_t hi;
+  mp_limb_t ap1_hi, bp1_hi;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  /* Required, to ensure that s + t >= n. */
+  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
+
+  n = 2 * an >= 3 * bn ? (an + 2) / (size_t) 3 : (bn + 1) >> 1;
+
+  s = an - 2 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s + t >= n);
+
+  /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
+#define ap1 (pp)		/* n, most significant limb in ap1_hi */
+#define bp1 (pp + n)		/* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n)		/* n, most significant bit in hi */
+#define bm1 (pp + 3*n)		/* n */
+#define v1 (scratch)		/* 2n + 1 */
+#define vm1 (pp)		/* 2n + 1 */
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+
+  /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+
+  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+
+  /* Compute ap1 = a0 + a1 + a2, am1 = a0 - a1 + a2 */
+  ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+      hi = ap1_hi - (cy & 1);
+      ap1_hi += (cy >> 1);
+      vm1_neg = 0;
+    }
+#else
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+      vm1_neg = 0;
+    }
+  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
+#endif
+
+  /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+	}
+      bp1_hi = cy >> 1;
+#else
+      bp1_hi = mpn_add_n (bp1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+	}
+#endif
+    }
+  else
+    {
+      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+      bp1_hi = mpn_add (bp1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
+	  MPN_ZERO (bm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+	}
+    }
+
+  TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
+  if (ap1_hi == 1)
+    {
+      cy = mpn_add_n (v1 + n, v1 + n, bp1, n);
+    }
+  else if (ap1_hi > 1) /* ap1_hi == 2 */
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = mpn_addlsh1_n_ip1 (v1 + n, bp1, n);
+#else
+      cy = mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bp1_hi != 0)
+    cy += ap1_hi + mpn_add_n (v1 + n, v1 + n, ap1, n);
+  v1[2 * n] = cy;
+
+  TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
+  if (hi)
+    hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+
+  vm1[2*n] = hi;
+
+  /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
+  if (vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_sub_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_add_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+
+  /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
+
+     y = x1 + x3 + (x0 + x2) * B
+       = (x0 + x2) * B + (x0 + x2) - vm1.
+
+     y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
+     follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
+     (already in place, except for carry propagation).
+
+     We thus add
+
+   B^3  B^2   B    1
+    |    |    |    |
+   +-----+----+
+ + |  x0 + x2 |
+   +----+-----+----+
+ +      |  x0 + x2 |
+	+----------+
+ -      |  vm1     |
+ --+----++----+----+-
+   | y2  | y1 | y0 |
+   +-----+----+----+
+
+  Since we store y0 at the same location as the low half of x0 + x2, we
+  need to do the middle sum first. */
+
+  hi = vm1[2*n];
+  cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
+  MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
+
+  /* FIXME: Can we get rid of this second vm1_neg conditional by
+     swapping the location of +1 and -1 values? */
+  if (vm1_neg)
+    {
+      cy = mpn_add_n (v1, v1, vm1, n);
+      hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_INCR_U (v1 + n, n+1, hi);
+    }
+  else
+    {
+      cy = mpn_sub_n (v1, v1, vm1, n);
+      hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_DECR_U (v1 + n, n+1, hi);
+    }
+
+  TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
+  /* vinf, s+t limbs.  Use mpn_mul for now, to handle unbalanced operands */
+  if (s > t)  mpn_mul (pp+3*n, a2, s, b1, t);
+  else        mpn_mul (pp+3*n, b1, t, a2, s);
+
+  /* Remaining interpolation.
+
+     y * B + x0 + x3 B^3 - x0 B^2 - x3 B
+     = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
+     = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
+       + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
+     = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
+       + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
+
+	  B^4       B^3       B^2        B         1
+ |         |         |         |         |         |
+   +-------+                   +---------+---------+
+   |  Hx3  |                   | Hx0-Lx3 |    Lx0  |
+   +------+----------+---------+---------+---------+
+	  |    y2    |  y1     |   y0    |
+	  ++---------+---------+---------+
+	  -| Hx0-Lx3 | - Lx0   |
+	   +---------+---------+
+		      | - Hx3  |
+		      +--------+
+
+    We must take into account the carry from Hx0 - Lx3.
+  */
+
+  cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
+  hi = scratch[2*n] + cy;
+
+  cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
+  hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
+
+  hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
+
+  /* FIXME: Is support for s + t == n needed? */
+  if (LIKELY (s + t > n))
+    {
+      hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+
+      ASSERT (hi >= 0); /* contribution of the middle terms >= 0 */
+      MPN_INCR_U (pp + 4*n, s+t-n, hi);
+    }
+  else
+    ASSERT (hi == 0);
+}

diff --git a/mpn/generic/toom33_mul.c b/mpn/generic/toom33_mul.c
new file mode 100644
index 0000000..54f055f
--- /dev/null
+++ b/mpn/generic/toom33_mul.c

@@ -0,0 +1,316 @@
+/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (3/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n-->
+   ____ ______ ______
+  |_a2_|___a1_|___a0_|
+   |b2_|___b1_|___b0_|
+   <-t-><--n--><--n-->
+
+  v0  =  a0         * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
+  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
+  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
+  vinf=          a2 *         b2  # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom33   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
+#endif
+
+/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
+   multiplication at the infinity point. We may have
+   MAYBE_mul_basecase == 0, and still get s just below
+   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
+   s == 1 and mpn_toom22_mul will crash.
+*/
+
+#define TOOM33_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else if (! MAYBE_mul_toom33						\
+	     || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+    else								\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+void
+mpn_toom33_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+  t = bn - 2 * n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  as1  = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  bs1 = pp;
+  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
+  bs2 = pp + 2 * n + 2;
+
+  gp = scratch;
+
+  vm1_neg = 0;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  cy = mpn_add (gp, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
+      bs1[n] = cy >> 1;
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+      bs1[n] = cy + (cy2 >> 1);
+      bsm1[n] = cy - (cy2 & 1);
+    }
+#else
+  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, gp, n);
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (bsm1, gp, b1, n);
+      bsm1[n] = cy;
+    }
+#endif
+
+  /* Compute bs2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (bs2, b2, bs1, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+  cy  = mpn_add_n (bs2, bs1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy -= mpn_sub_n (bs2, bs2, b0, n);
+#endif
+#endif
+  bs2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 6);
+  ASSERT (bs2[n] <= 6);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+t */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else
+  vm1[2 * n] = 0;
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
+#endif
+
+  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
+  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+}

diff --git a/mpn/generic/toom3_sqr.c b/mpn/generic/toom3_sqr.c
new file mode 100644
index 0000000..297a27f
--- /dev/null
+++ b/mpn/generic/toom3_sqr.c

@@ -0,0 +1,221 @@
+/* mpn_toom3_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2012, 2015, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n-->
+   ____ ______ ______
+  |_a2_|___a1_|___a0_|
+
+  v0  =  a0         ^2 #   A(0)^2
+  v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
+  vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
+  v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
+  vinf=          a2 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom3   1
+#else
+#define MAYBE_sqr_basecase						\
+  (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom3							\
+  (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
+#endif
+
+#define TOOM3_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase						\
+	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (! MAYBE_sqr_toom3						\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom3_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom3_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  const int __gmpn_cpuvec_initialized = 1;
+  mp_size_t n, s;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  as1 = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  gp = scratch;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+s */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+  cy = asm1[n];
+  if (cy != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (vm1 + n, asm1, n);
+#else
+      cy += mpn_addmul_1 (vm1 + n, asm1, n, CNST_LIMB(2));
+#endif
+    }
+  vm1[2 * n] = cy;
+#else
+  vm1[2 * n] = 0;
+  TOOM3_SQR_REC (vm1, asm1, n + asm1[n], scratch_out);
+#endif
+
+  TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM3_SQR_REC (v1, as1, n, scratch_out);
+  cy = as1[n];
+  if (cy == 1)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (cy != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+      cy = 4 + mpn_addlsh2_n_ip1 (v1 + n, as1, n);
+#else
+      cy = 4 + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(4));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+}

diff --git a/mpn/generic/toom42_mul.c b/mpn/generic/toom42_mul.c
new file mode 100644
index 0000000..e84ce65
--- /dev/null
+++ b/mpn/generic/toom42_mul.c

@@ -0,0 +1,234 @@
+/* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
+   as large as bn.  Or more accurately, (3/2)bn < an < 4bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
+
+  v0  =  a0             * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1) #   A(1)*B(1)      ah  <= 3  bh <= 1
+  vm1 = (a0- a1+ a2- a3)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh  = 0
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1) #   A(2)*B(2)      ah  <= 14 bh <= 2
+  vinf=              a3 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM42_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    mpn_mul_n (p, a, b, n);						\
+  } while (0)
+
+void
+mpn_toom42_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr a0_a2;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+  mp_ptr tmp;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+
+  s = an - 3 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  tmp = TMP_ALLOC_LIMBS (6 * n + 5);
+  as1  = tmp; tmp += n + 1;
+  asm1 = tmp; tmp += n + 1;
+  as2  = tmp; tmp += n + 1;
+  bs1  = tmp; tmp += n + 1;
+  bsm1 = tmp; tmp += n;
+  bs2  = tmp; tmp += n + 1;
+
+  a0_a2 = pp;
+
+  /* Compute as1 and asm1.  */
+  vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  vm1_neg ^= 1;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+  /* Compute bs2, recycling bs1. bs2=bs1+b1  */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0);*/
+  ASSERT (as2[n] <= 14);
+  ASSERT (bs2[n] <= 2);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 4 * n)			/* s+t */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 1)		/* 2n+2 */
+#define scratch_out	scratch + 4 * n + 4	/* Currently unused. */
+
+  /* vm1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  vm1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a3, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a3, s);
+
+  vinf0 = vinf[0];				/* v1 overlaps with this */
+
+  /* v1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+      cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] == 3)
+    {
+      cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/toom42_mulmid.c b/mpn/generic/toom42_mulmid.c
new file mode 100644
index 0000000..f581b10
--- /dev/null
+++ b/mpn/generic/toom42_mulmid.c

@@ -0,0 +1,237 @@
+/* mpn_toom42_mulmid -- toom42 middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+
+/*
+  Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
+
+  Neither ap nor bp may overlap rp.
+
+  Must have n >= 4.
+
+  Amount of scratch space required is given by mpn_toom42_mulmid_itch().
+
+  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
+  requirements should be clarified.
+*/
+void
+mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+                   mp_ptr scratch)
+{
+  mp_limb_t cy, e[12], zh, zl;
+  mp_size_t m;
+  int neg;
+
+  ASSERT (n >= 4);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  ap += n & 1;   /* handle odd row and diagonal later */
+  m = n / 2;
+
+  /* (e0h:e0l) etc are correction terms, in 2's complement */
+#define e0l (e[0])
+#define e0h (e[1])
+#define e1l (e[2])
+#define e1h (e[3])
+#define e2l (e[4])
+#define e2h (e[5])
+#define e3l (e[6])
+#define e3h (e[7])
+#define e4l (e[8])
+#define e4h (e[9])
+#define e5l (e[10])
+#define e5h (e[11])
+
+#define s (scratch + 2)
+#define t (rp + m + 2)
+#define p0 rp
+#define p1 scratch
+#define p2 (rp + m)
+#define next_scratch (scratch + 3*m + 1)
+
+  /*
+            rp                            scratch
+  |---------|-----------|    |---------|---------|----------|
+  0         m         2m+2   0         m         2m        3m+1
+            <----p2---->       <-------------s------------->
+  <----p0----><---t---->     <----p1---->
+  */
+
+  /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
+  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
+  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
+		       bp + m, bp, m, cy);
+  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
+
+  /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
+  if (mpn_cmp (bp + m, bp, m) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
+				      ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 1;
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
+				      ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 0;
+    }
+
+  /* recursive middle products. The picture is:
+
+      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -
+      ...       -   A   A   A   B   B   B   -   -   -   -
+      b[m]      -   -   A   A   A   B   B   B   -   -   -
+      b[m-1]    -   -   -   C   C   C   D   D   D   -   -
+      ...       -   -   -   -   C   C   C   D   D   D   -
+      b[0]      -   -   -   -   -   C   C   C   D   D   D
+               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]
+  */
+
+  if (m < MULMID_TOOM42_THRESHOLD)
+    {
+      /* A + B */
+      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
+      /* accumulate high limbs of p0 into e1 */
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      /* (-1)^neg * (B - C)   (overwrites first m limbs of s) */
+      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
+      /* C + D   (overwrites t) */
+      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
+    }
+  else
+    {
+      /* as above, but use toom42 instead */
+      mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
+      mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
+    }
+
+  /* apply error terms */
+
+  /* -e0 at rp[0] */
+  SUBC_LIMB (cy, rp[0], rp[0], e0l);
+  SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
+  if (UNLIKELY (cy))
+    {
+      cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
+      SUBC_LIMB (cy, e1l, e1l, cy);
+      e1h -= cy;
+    }
+
+  /* z = e1 - e2 + high(p0) */
+  SUBC_LIMB (cy, zl, e1l, e2l);
+  zh = e1h - e2h - cy;
+
+  /* z at rp[m] */
+  ADDC_LIMB (cy, rp[m], rp[m], zl);
+  zh = (zh + cy) & GMP_NUMB_MASK;
+  ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
+  cy -= (zh >> (GMP_NUMB_BITS - 1));
+  if (UNLIKELY (cy))
+    {
+      if (cy == 1)
+	mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
+      else /* cy == -1 */
+	mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
+    }
+
+  /* e3 at rp[2*m] */
+  ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
+  rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
+
+  /* e4 at p1[0] */
+  ADDC_LIMB (cy, p1[0], p1[0], e4l);
+  ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
+  if (UNLIKELY (cy))
+    mpn_add_1 (p1 + 2, p1 + 2, m, 1);
+
+  /* -e5 at p1[m] */
+  SUBC_LIMB (cy, p1[m], p1[m], e5l);
+  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
+
+  /* adjustment if p1 ends up negative */
+  cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
+
+  /* add (-1)^neg * (p1 - B^m * p1) to output */
+  if (neg)
+    {
+      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+  else
+    {
+      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+
+  /* odd row and diagonal */
+  if (n & 1)
+    {
+      /*
+        Products marked E are already done. We need to do products marked O.
+
+        OOOOO----
+        -EEEEO---
+        --EEEEO--
+        ---EEEEO-
+        ----EEEEO
+       */
+
+      /* first row of O's */
+      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
+      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
+
+      /* O's on diagonal */
+      /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
+         that can handle the sum below. Currently we're relying on
+         mulmid_basecase being pretty fast for a diagonal sum like this,
+	 which is true at least for the K8 asm version, but surely false
+	 for the generic version. */
+      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
+      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
+    }
+}

diff --git a/mpn/generic/toom43_mul.c b/mpn/generic/toom43_mul.c
new file mode 100644
index 0000000..34acd25
--- /dev/null
+++ b/mpn/generic/toom43_mul.c

@@ -0,0 +1,238 @@
+/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	|_b2_|___b1_|___b0_|
+	<-t--><--n--><--n-->
+
+  v0  =  a0             * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
+  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
+  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
+  vinf=              a3 *         b2  # A(inf)*B(inf)
+*/
+
+void
+mpn_toom43_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2 * n)
+
+  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+  s = an - 3 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* This is true whenever an >= 25 or bn >= 19, I think. It
+     guarantees that we can fit 5 values of size n+1 in the product
+     area. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp				/* 2n */
+#define vm1   (scratch)				/* 2n+1 */
+#define v1    (pp + 2*n)			/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 5 * n)			/* s+t */
+#define bs1    pp				/* n+1 */
+#define bsm1  (scratch + 2 * n + 2)		/* n+1 */
+#define asm1  (scratch + 3 * n + 3)		/* n+1 */
+#define asm2  (scratch + 4 * n + 4)		/* n+1 */
+#define bsm2  (pp + n + 1)			/* n+1 */
+#define bs2   (pp + 2 * n + 2)			/* n+1 */
+#define as2   (pp + 3 * n + 3)			/* n+1 */
+#define as1   (pp + 4 * n + 4)			/* n+1 */
+
+  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
+     limb, because products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define b0b2  scratch
+#define a1a3  asm1
+#define b1d   bsm1
+
+  /* Compute as2 and asm2.  */
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
+
+  /* Compute bs2 and bsm2.  */
+  b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
+#if HAVE_NATIVE_mpn_addlsh2_n
+  cy = mpn_addlsh2_n (b0b2, b0, b2, t);			/*  4b2      + b0 */
+#else
+  cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
+  cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
+#endif
+  if (t != n)
+    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
+  b0b2[n] = cy;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
+    }
+#else
+  mpn_add_n (bs2, b0b2, b1d, n+1);
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_sub_n (bsm2, b1d, b0b2, n+1);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      mpn_sub_n (bsm2, b0b2, b1d, n+1);
+    }
+#endif
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
+
+  /* Compute bs1 and bsm1.  */
+  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
+      bs1[n] = cy >> 1;
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
+      bs1[n] = bsm1[n] + (cy >> 1);
+      bsm1[n]-= cy & 1;
+    }
+#else
+  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bsm1, n);
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+    }
+  else
+    {
+      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
+    }
+#endif
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <=14);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 9);
+  ASSERT (bsm2[n] <= 4);
+
+  /* vm1, 2n+1 limbs */
+  vm1[2*n] = 0;
+  mpn_mul_n (vm1, asm1, bsm1, n + (asm1[n] | bsm1[n]));  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a3, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+/* #undef as1 */
+/* #undef as2 */
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef b1d
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+}

diff --git a/mpn/generic/toom44_mul.c b/mpn/generic/toom44_mul.c
new file mode 100644
index 0000000..a361899
--- /dev/null
+++ b/mpn/generic/toom44_mul.c

@@ -0,0 +1,239 @@
+/* mpn_toom44_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (4/3)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =   a0             *  b0              #    A(0)*B(0)
+  v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
+  vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
+  v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
+  vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) #   A(-2)*B(-2)    |ah| <= 9  |bh| <= 9
+  vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vinf=               a3 *          b2      #  A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom44   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom44						\
+  (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+#endif
+
+#define TOOM44_MUL_N_REC(p, a, b, n, ws)				\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else if (MAYBE_mul_toom22						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+    else if (! MAYBE_mul_toom44						\
+	     || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+    else								\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+  } while (0)
+
+/* Use of scratch space. In the product area, we store
+
+      ___________________
+     |vinf|____|_v1_|_v0_|
+      s+t  2n-1 2n+1  2n
+
+   The other recursive products, vm1, v2, vm2, vh are stored in the
+   scratch area. When computing them, we use the product area for
+   intermediate values.
+
+   Next, we compute v1. We can store the intermediate factors at v0
+   and at vh + 2n + 2.
+
+   Finally, for v0 and vinf, factors are parts of the input operands,
+   and we need scratch space only for the recursive multiplication.
+
+   In all, if S(an) is the scratch need, the needed space is bounded by
+
+     S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
+
+   which should give S(n) = 8 n/3 + c log(n) for some constant c.
+*/
+
+void
+mpn_toom44_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  enum toom7_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+#define b3  (bp + 3*n)
+
+  ASSERT (an >= bn);
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s >= t);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* apx and bpx must not overlap with v1 */
+#define apx   pp				/* n+1 */
+#define amx   (pp + n + 1)			/* n+1 */
+#define bmx   (pp + 2*n + 2)			/* n+1 */
+#define bpx   (pp + 4*n + 2)			/* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
+
+  /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
+  flags = (enum toom7_flags) (flags ^ (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp)));
+
+  TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);	/* v2,  2n+1 limbs */
+  TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp);	/* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bpx, b1, b0, n);
+  cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
+      bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
+      MPN_INCR_U (bpx + t, n+1-t, cy2);
+    }
+  else
+    bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+#else
+  cy = mpn_lshift (bpx, b0, n, 1);
+  cy += mpn_add_n (bpx, bpx, b1, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  cy += mpn_add_n (bpx, bpx, b2, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+#endif
+
+  ASSERT (apx[n] < 15);
+  ASSERT (bpx[n] < 15);
+
+  TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);	/* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  flags = (enum toom7_flags) (flags | (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp)));
+
+  /* Compute bpx = b0 + b1 + b2 + b3 and bmx = b0 - b1 + b2 - b3.  */
+  flags = (enum toom7_flags) (flags ^ (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp)));
+
+  ASSERT (amx[n] <= 1);
+  ASSERT (bmx[n] <= 1);
+
+  vm1 [2 * n] = 0;
+  TOOM44_MUL_N_REC (vm1, amx, bmx, n + (bmx[n] | amx[n]), tp);	/* vm1,  2n+1 limbs */
+  /* Clobbers amx, bmx. */
+  TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp);	/* v1,  2n+1 limbs */
+
+  TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
+  if (s > t)
+    mpn_mul (vinf, a3, s, b3, t);
+  else
+    TOOM44_MUL_N_REC (vinf, a3, b3, s, tp);	/* vinf, s+t limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+}

diff --git a/mpn/generic/toom4_sqr.c b/mpn/generic/toom4_sqr.c
new file mode 100644
index 0000000..fd59d1c
--- /dev/null
+++ b/mpn/generic/toom4_sqr.c

@@ -0,0 +1,164 @@
+/* mpn_toom4_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2010, 2013, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1/2, +1, +2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+
+  v0  =   a0             ^2 #    A(0)^2
+  v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
+  vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
+  v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
+  vm2 = ( a0-2a1+4a2-8a3)^2 #   A(-2)^2  -9<=ah<=4
+  vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
+  vinf=               a3 ^2 #  A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_toom4   1
+#else
+#define MAYBE_sqr_basecase						\
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2							\
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom4							\
+  (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+#endif
+
+#define TOOM4_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase						\
+	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (MAYBE_sqr_toom2						\
+	     && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else if (! MAYBE_sqr_toom4						\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom4_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom4_sqr (mp_ptr pp,
+	       mp_srcptr ap, mp_size_t an,
+	       mp_ptr scratch)
+{
+  mp_size_t n, s;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* No overlap with v1 */
+#define apx   pp				/* n+1 */
+#define amx   (pp + 4*n + 2)			/* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v2, apx, n + 1, tp);	/* v2,  2n+1 limbs */
+  TOOM4_SQR_REC (vm2, amx, n + 1, tp);	/* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  ASSERT (apx[n] < 15);
+
+  TOOM4_SQR_REC (vh, apx, n + 1, tp);	/* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v1, apx, n + 1, tp);	/* v1,  2n+1 limbs */
+  vm1 [2 * n] = 0;
+  TOOM4_SQR_REC (vm1, amx, n + amx[n], tp);	/* vm1,  2n+1 limbs */
+
+  TOOM4_SQR_REC (v0, a0, n, tp);
+  TOOM4_SQR_REC (vinf, a3, s, tp);	/* vinf, 2s limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
+}

diff --git a/mpn/generic/toom52_mul.c b/mpn/generic/toom52_mul.c
new file mode 100644
index 0000000..974059b
--- /dev/null
+++ b/mpn/generic/toom52_mul.c

@@ -0,0 +1,256 @@
+/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+			|b1|___b0_|
+			<t-><--n-->
+
+  v0  =  a0                  * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
+  vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
+  vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
+  vinf=                   a4 *     b1  # A(inf)*B(inf)
+
+  Some slight optimization in evaluation are taken from the paper:
+  "Towards Optimal Toom-Cook Multiplication for Univariate and
+  Multivariate Polynomials in Characteristic 2 and 0."
+*/
+
+void
+mpn_toom52_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define a4  (ap + 4 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+
+  s = an - 4 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* Ensures that 5 values of n+1 limbs each fits in the product area.
+     Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
+     n = 8. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp				/* 2n */
+#define vm1   (scratch)				/* 2n+1 */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 5 * n)			/* s+t */
+#define bs1    pp				/* n+1 */
+#define bsm1  (scratch + 2 * n + 2)		/* n   */
+#define asm1  (scratch + 3 * n + 3)		/* n+1 */
+#define asm2  (scratch + 4 * n + 4)		/* n+1 */
+#define bsm2  (pp + n + 1)			/* n+1 */
+#define bs2   (pp + 2 * n + 2)			/* n+1 */
+#define as2   (pp + 3 * n + 3)			/* n+1 */
+#define as1   (pp + 4 * n + 4)			/* n+1 */
+
+  /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
+     products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define a1a3  asm1
+
+  /* Compute as2 and asm2.  */
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      mp_limb_t cy;
+
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	}
+    }
+
+  /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
+  mpn_add (bs2, bs1, n+1, b1, t);
+  if (flags & toom6_vm1_neg)
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+    }
+  else
+    {
+      bsm2[n] = 0;
+      if (t == n)
+	{
+	  if (mpn_cmp (bsm1, b1, n) < 0)
+	    {
+	      mpn_sub_n (bsm2, b1, bsm1, n);
+	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+	    }
+	  else
+	    {
+	      mpn_sub_n (bsm2, bsm1, b1, n);
+	    }
+	}
+      else
+	{
+	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+	    {
+	      mpn_sub_n (bsm2, b1, bsm1, t);
+	      MPN_ZERO (bsm2 + t, n - t);
+	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
+	    }
+	  else
+	    {
+	      mpn_sub (bsm2, bsm1, n, b1, t);
+	    }
+	}
+    }
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2)));
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+/*   ASSERT (bsm1[n] <= 1); */
+  ASSERT (as2[n] <=30);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 1);
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a4, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a4, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+#undef as1
+#undef as2
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+
+}

diff --git a/mpn/generic/toom53_mul.c b/mpn/generic/toom53_mul.c
new file mode 100644
index 0000000..c934297
--- /dev/null
+++ b/mpn/generic/toom53_mul.c

@@ -0,0 +1,331 @@
+/* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3
+   times as large as bn.  Or more accurately, (4/3)bn < an < (5/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+	       |__b2|___b1_|___b0_|
+	       <-t--><--n--><--n-->
+
+  v0  =    a0                  *  b0          #    A(0)*B(0)
+  v1  = (  a0+ a1+ a2+ a3+  a4)*( b0+ b1+ b2) #    A(1)*B(1)      ah  <= 4   bh <= 2
+  vm1 = (  a0- a1+ a2- a3+  a4)*( b0- b1+ b2) #   A(-1)*B(-1)    |ah| <= 2   bh <= 1
+  v2  = (  a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) #    A(2)*B(2)      ah  <= 30  bh <= 6
+  vm2 = (  a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) #    A(2)*B(2)     -9<=ah<=20 -1<=bh<=4
+  vh  = (16a0+8a1+4a2+2a3+  a4)*(4b0+2b1+ b2) #  A(1/2)*B(1/2)    ah  <= 30  bh <= 6
+  vinf=                     a4 *          b2  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom53_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  mp_ptr tmp;
+  enum toom7_flags flags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+
+  s = an - 4 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  tmp = TMP_ALLOC_LIMBS (10 * (n + 1));
+  as1  = tmp; tmp += n + 1;
+  asm1 = tmp; tmp += n + 1;
+  as2  = tmp; tmp += n + 1;
+  asm2 = tmp; tmp += n + 1;
+  ash  = tmp; tmp += n + 1;
+  bs1  = tmp; tmp += n + 1;
+  bsm1 = tmp; tmp += n + 1;
+  bs2  = tmp; tmp += n + 1;
+  bsm2 = tmp; tmp += n + 1;
+  bsh  = tmp; tmp += n + 1;
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
+
+  /* Compute as2 and asm2. */
+  flags = (enum toom7_flags) (flags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp)));
+
+  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
+     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+      bsm1[n] = 0;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+      bsm1[n] = bs1[n] - (cy & 1);
+      bs1[n] += (cy >> 1);
+    }
+#else
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bs1, n);
+      bsm1[n] = 0;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+    }
+  else
+    {
+      bsm1[n] = bs1[n] - mpn_sub_n (bsm1, bs1, b1, n);
+    }
+  bs1[n] += mpn_add_n (bs1, bs1, b1, n);  /* b0+b1+b2 */
+#endif
+
+  /* Compute bs2 and bsm2. */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  cy = mpn_addlsh2_n (bs2, b0, b2, t);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
+#endif
+  if (t < n)
+    cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
+  bs2[n] = cy;
+#else
+  cy = mpn_lshift (gp, b2, t, 2);
+  bs2[n] = mpn_add (bs2, b0, n, gp, t);
+  MPN_INCR_U (bs2 + t, n+1-t, cy);
+#endif
+
+  gp[n] = mpn_lshift (gp, b1, n, 1);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
+    }
+#else
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+    }
+  mpn_add_n (bs2, bs2, gp, n+1);
+#endif
+
+  /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bsh, b1, b0, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
+      bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
+      MPN_INCR_U (bsh + t, n+1-t, cy2);
+    }
+  else
+    bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
+#else
+  cy = mpn_lshift (bsh, b0, n, 1);
+  cy += mpn_add_n (bsh, bsh, b1, n);
+  cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
+  bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
+#endif
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 30);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 4);
+  ASSERT (ash[n] <= 30);
+  ASSERT (bsh[n] <= 6);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  if (asm1[n] == 1)
+    {
+      cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bsm1[n] + mpn_addlsh1_n_ip1 (vm1 + n, bsm1, n);
+#else
+      cy = 2 * bsm1[n] + mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  vm1[2 * n] = 0;
+  mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  /* v1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  v1[2 * n] = 0;
+  mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a4, s);
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
+			     scratch_out);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/toom54_mul.c b/mpn/generic/toom54_mul.c
new file mode 100644
index 0000000..343b02e
--- /dev/null
+++ b/mpn/generic/toom54_mul.c

@@ -0,0 +1,142 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______
+  |_a4_|__a3__|__a2__|__a1__|__a0__|
+	  |b3_|__b2__|__b1__|__b0__|
+	  <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws)		\
+  do {	mpn_mul_n (p, a, b, n);				\
+  } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)		\
+  do {	mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a4  (ap + 4 * n)
+#define b3  (bp + 3 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+  s = an - 4 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* Required by mpn_toom_interpolate_8pts. */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  ASSERT ( n > 2);
+
+#define   r8    pp				/* 2n   */
+#define   r7    scratch				/* 3n+1 */
+#define   r5    (pp + 3*n)			/* 3n+1 */
+#define   v0    (pp + 3*n)			/* n+1 */
+#define   v1    (pp + 4*n+1)			/* n+1 */
+#define   v2    (pp + 5*n+2)			/* n+1 */
+#define   v3    (pp + 6*n+3)			/* n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (pp + 7*n)			/* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)		/* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
+       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
+       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
+       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+  } else {
+    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}

diff --git a/mpn/generic/toom62_mul.c b/mpn/generic/toom62_mul.c
new file mode 100644
index 0000000..d971cc0
--- /dev/null
+++ b/mpn/generic/toom62_mul.c

@@ -0,0 +1,310 @@
+/* mpn_toom62_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 3 times
+   as large as bn.  Or more accurately, (5/2)bn < an < 6bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluate in:
+   0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______ ______
+  |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
+			     |_b1_|___b0_|
+			     <-t--><--n-->
+
+  v0  =    a0                       *   b0      #    A(0)*B(0)
+  v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
+  vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
+  vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
+  vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
+  vinf=                           a5 *      b1  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom62_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn,
+		mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  mp_ptr gp;
+  enum toom7_flags aflags, bflags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define a5  (ap + 5*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+
+  s = an - 5 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
+  asm2 = TMP_SALLOC_LIMBS (n + 1);
+  ash = TMP_SALLOC_LIMBS (n + 1);
+
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
+  bsm2 = TMP_SALLOC_LIMBS (n + 1);
+  bsh = TMP_SALLOC_LIMBS (n + 1);
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
+
+  /* Compute as2 and asm2. */
+  aflags = (enum toom7_flags) (aflags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp)));
+
+  /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
+     = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a5, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a4, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	  bflags = (enum toom7_flags) 0;
+	}
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, n);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  mpn_sub_n (bsm1, b0, b1, n);
+	  bflags = (enum toom7_flags) 0;
+	}
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+	{
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
+	  bflags = toom7_w3_neg;
+	}
+      else
+	{
+	  mpn_sub (bsm1, b0, n, b1, t);
+	  bflags = (enum toom7_flags) 0;
+	}
+    }
+
+  /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
+     bsm1 - b1 */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+  if (bflags & toom7_w3_neg)
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+    }
+  else
+    {
+      /* FIXME: Simplify this logic? */
+      if (t < n)
+	{
+	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
+	      MPN_ZERO (bsm2 + t, n + 1 - t);
+	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+	    }
+	  else
+	    {
+	      ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
+	      bsm2[n] = 0;
+	    }
+	}
+      else
+	{
+	  if (mpn_cmp (bsm1, b1, n) < 0)
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
+	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
+	    }
+	  else
+	    {
+	      ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
+	    }
+	  bsm2[n] = 0;
+	}
+    }
+
+  /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
+  bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
+
+  ASSERT (as1[n] <= 5);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (as2[n] <= 62);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 41);
+  ASSERT (bsm2[n] <= 1);
+  ASSERT (ash[n] <= 62);
+  ASSERT (bsh[n] <= 2);
+
+#define v0    pp				/* 2n */
+#define v1    (pp + 2 * n)			/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define v2    scratch				/* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
+#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  cy = 0;
+  if (asm1[n] == 1)
+    {
+      cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
+#else
+      cy = mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  vm1[2 * n] = cy;
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a5, s);
+
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
+			     vm2, vm1, v2, vh, s + t, scratch_out);
+
+  TMP_FREE;
+}

diff --git a/mpn/generic/toom63_mul.c b/mpn/generic/toom63_mul.c
new file mode 100644
index 0000000..181996d
--- /dev/null
+++ b/mpn/generic/toom63_mul.c

@@ -0,0 +1,231 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+	{
+	  n++;
+	  if (x > y)
+	    {
+	      mpn_sub_n (rp, ap, bp, n);
+	      return 0;
+	    }
+	  else
+	    {
+	      mpn_sub_n (rp, bp, ap, n);
+	      return ~0;
+	    }
+	}
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+static int
+abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
+  int result;
+  result = abs_sub_n (rm, rp, rs, n);
+  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
+  return result;
+}
+
+
+/* Toom-4.5, the splitting 6x3 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______ ______
+  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
+			|b2_|__b1__|__b0__|
+			<-t-><--n--><--n-->
+
+*/
+#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
+  do {	mpn_mul_n (p, a, b, n);				\
+  } while (0)
+
+#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
+  do {	mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+void
+mpn_toom63_mul (mp_ptr pp,
+		mp_srcptr ap, mp_size_t an,
+		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a5  (ap + 5 * n)
+#define b0  (bp + 0 * n)
+#define b1  (bp + 1 * n)
+#define b2  (bp + 2 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+
+  s = an - 5 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* WARNING! it assumes s+t>=n */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  /* WARNING! it assumes n>1 */
+  ASSERT ( n > 2);
+
+#define   r8    pp				/* 2n   */
+#define   r7    scratch				/* 3n+1 */
+#define   r5    (pp + 3*n)			/* 3n+1 */
+#define   v0    (pp + 3*n)			/* n+1 */
+#define   v1    (pp + 4*n+1)			/* n+1 */
+#define   v2    (pp + 5*n+2)			/* n+1 */
+#define   v3    (pp + 6*n+3)			/* n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (pp + 7*n)			/* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)		/* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
+  /* FIXME: use addlsh */
+  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  /* Compute bs1 and bsm1. Code taken from toom33 */
+  cy = mpn_add (ws, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
+      v3[n] = cy >> 1;
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
+      v3[n] = cy + (cy2 >> 1);
+      v1[n] = cy - (cy2 & 1);
+    }
+#else
+  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      mpn_sub_n (v1, b1, ws, n);
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      cy -= mpn_sub_n (v1, ws, b1, n);
+      v1[n] = cy;
+    }
+#endif
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
+  /* FIXME: use addlsh or addlsh2 */
+  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
+  } else {
+    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a5
+#undef b0
+#undef b1
+#undef b2
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}

diff --git a/mpn/generic/toom6_sqr.c b/mpn/generic/toom6_sqr.c
new file mode 100644
index 0000000..336eef9
--- /dev/null
+++ b/mpn/generic/toom6_sqr.c

@@ -0,0 +1,181 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_above_toom4   1
+#else
+#ifdef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
+#else
+#define SQR_TOOM6_MAX					\
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
+   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
+   : MP_SIZE_T_MAX )
+#endif
+#define MAYBE_sqr_basecase					\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase				\
+  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2					\
+  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3					\
+  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4					\
+  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+    else if (! MAYBE_sqr_above_toom4					\
+	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
+      mpn_toom4_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom6_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT( an >= 18 );
+
+  n = 1 + (an - 1) / (size_t) 6;
+
+  s = an - 5 * n;
+
+  ASSERT (0 < s && s <= n);
+
+#define   r4    (pp + 3 * n)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+#define   r5    (scratch)			/* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   v0    (pp + 7 * n)			/* n+1 */
+#define   v2    (pp + 9 * n+2)			/* n+1 */
+#define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch== NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM6_SQR_REC(pp, ap, n, wse);
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+
+}
+#undef TOOM6_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4

diff --git a/mpn/generic/toom6h_mul.c b/mpn/generic/toom6h_mul.c
new file mode 100644
index 0000000..637f2a5
--- /dev/null
+++ b/mpn/generic/toom6h_mul.c

@@ -0,0 +1,262 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom6h   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom6h						\
+  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
+#endif
+
+#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+      if (f)								\
+	mpn_mul_basecase (p2, a2, n, b2, n);				\
+    } else if (MAYBE_mul_toom22						\
+	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
+    } else if (MAYBE_mul_toom33						\
+	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
+    } else if (! MAYBE_mul_toom6h					\
+	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
+    } else {								\
+      mpn_toom6h_mul (p, a, n, b, n, ws);				\
+      if (f)								\
+	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
+    }									\
+  } while (0)
+
+#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
+  do { mpn_mul (p, a, na, b, nb);			\
+  } while (0)
+
+/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 46, an*6 <  bn * 17.
+   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
+
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
+
+void
+mpn_toom6h_mul   (mp_ptr pp,
+		  mp_srcptr ap, mp_size_t an,
+		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT (an >= bn);
+  /* Can not handle too much unbalancement */
+  ASSERT (bn >= 42);
+  /* Can not handle too much unbalancement */
+  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
+
+  /* Limit num/den is a rational number between
+     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
+#define LIMIT_numerator (18)
+#define LIMIT_denominat (17)
+
+  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
+    {
+      n = 1 + (an - 1) / (size_t) 6;
+      p = q = 5;
+      half = 0;
+
+      s = an - 5 * n;
+      t = bn - 5 * n;
+    }
+  else {
+    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
+      { p = 7; q = 6; }
+    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
+      { p = 7; q = 5; }
+    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 5; }
+    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 4; }
+    else
+      { p = 9; q = 4; }
+
+    half = (p ^ q) & 1;
+    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+    p--; q--;
+
+    s = an - p * n;
+    t = bn - q * n;
+
+    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+    if (half) { /* Recover from badly chosen splitting */
+      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+    }
+  }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r4    (pp + 3 * n)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+#define   r5    (scratch)			/* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   v0    (pp + 7 * n)			/* n+1 */
+#define   v1    (pp + 8 * n+1)			/* n+1 */
+#define   v2    (pp + 9 * n+2)			/* n+1 */
+#define   v3    (scratch + 9 * n + 3)		/* n+1 */
+#define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
+#define   wse   (scratch +10 * n + 4)		/* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
+  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
+  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (UNLIKELY (q == 3))
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-4)*B(-4) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
+      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef wsi
+}
+
+#undef TOOM6H_MUL_N_REC
+#undef TOOM6H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom6h

diff --git a/mpn/generic/toom8_sqr.c b/mpn/generic/toom8_sqr.c
new file mode 100644
index 0000000..03e5c64
--- /dev/null
+++ b/mpn/generic/toom8_sqr.c

@@ -0,0 +1,225 @@
+/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_toom4   1
+#define MAYBE_sqr_above_toom4   1
+#define MAYBE_sqr_above_toom6   1
+#else
+#define SQR_TOOM8_MAX					\
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
+   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
+   : MP_SIZE_T_MAX )
+#define MAYBE_sqr_basecase					\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase				\
+  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2					\
+  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3					\
+  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_toom4						\
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom4					\
+  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom6					\
+  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
+#endif
+
+#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
+  do {									\
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
+      mpn_sqr_basecase (p, a, n);					\
+      if (f) mpn_sqr_basecase (p2, a2, n);				\
+    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+      if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
+    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+      if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
+    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
+      mpn_toom4_sqr (p, a, n, ws);					\
+      if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
+    } else if (! MAYBE_sqr_above_toom6					\
+	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
+      mpn_toom6_sqr (p, a, n, ws);					\
+      if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
+    } else {								\
+      mpn_toom8_sqr (p, a, n, ws);					\
+      if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
+    }									\
+  } while (0)
+
+void
+mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT ( an >= 40 );
+
+  n = 1 + ((an - 1)>>3);
+
+  s = an - 7 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT ( s + s > 3 );
+
+#define   r6    (pp + 3 * n)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+#define   r7    (scratch)			/* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
+#define   v0    (pp +11 * n)			/* n+1 */
+#define   v2    (pp +13 * n+2)			/* n+1 */
+#define   wse   (scratch +12 * n + 4)		/* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/8$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
+
+  /* $\pm8$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
+
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wse
+
+}
+
+#undef TOOM8_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4

diff --git a/mpn/generic/toom8h_mul.c b/mpn/generic/toom8h_mul.c
new file mode 100644
index 0000000..5ba259a
--- /dev/null
+++ b/mpn/generic/toom8h_mul.c

@@ -0,0 +1,305 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom44   1
+#define MAYBE_mul_toom8h   1
+#else
+#define MAYBE_mul_basecase						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom44						\
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
+#define MAYBE_mul_toom8h						\
+  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
+#endif
+
+#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
+  do {									\
+    if (MAYBE_mul_basecase						\
+	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
+      mpn_mul_basecase (p, a, n, b, n);					\
+      if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
+    } else if (MAYBE_mul_toom22						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
+    } else if (MAYBE_mul_toom33						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
+      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
+    } else if (MAYBE_mul_toom44						\
+	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
+      mpn_toom44_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
+    } else if (! MAYBE_mul_toom8h					\
+	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
+      mpn_toom6h_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
+    } else {								\
+      mpn_toom8h_mul (p, a, n, b, n, ws);				\
+      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
+    }									\
+  } while (0)
+
+#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
+  do { mpn_mul (p, a, na, b, nb); } while (0)
+
+/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 86, an*5 <  bn * 11.
+   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
+
+   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
+   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
+ */
+
+void
+mpn_toom8h_mul   (mp_ptr pp,
+		  mp_srcptr ap, mp_size_t an,
+		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT (an >= bn);
+  /* Can not handle too small operands */
+  ASSERT (bn >= 86);
+  /* Can not handle too much unbalancement */
+  ASSERT (an <= bn*4);
+  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
+  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
+  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
+
+  /* Limit num/den is a rational number between
+     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
+#define LIMIT_numerator (21)
+#define LIMIT_denominat (20)
+
+  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
+    {
+      half = 0;
+      n = 1 + ((an - 1)>>3);
+      p = q = 7;
+      s = an - 7 * n;
+      t = bn - 7 * n;
+    }
+  else
+    {
+      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
+	{ p = 9; q = 8; }
+      else if (GMP_NUMB_BITS <= 9*3 ||
+	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
+	{ p = 9; q = 7; }
+      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
+	{ p =10; q = 7; }
+      else if (GMP_NUMB_BITS <= 10*3 ||
+	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
+	{ p =10; q = 6; }
+      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
+	{ p =11; q = 6; }
+      else if (GMP_NUMB_BITS <= 11*3 ||
+	       an * 4 < 9 * bn)
+	{ p =11; q = 5; }
+      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
+	{ p =12; q = 5; }
+      else if (GMP_NUMB_BITS <= 12*3 ||
+	       an * 9 < 28 * bn )  /* is 4*... <12*... */
+	{ p =12; q = 4; }
+      else
+	{ p =13; q = 4; }
+
+      half = (p+q)&1;
+      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+      p--; q--;
+
+      s = an - p * n;
+      t = bn - q * n;
+
+      if(half) { /* Recover from badly chosen splitting */
+	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+      }
+    }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r6    (pp + 3 * n)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+#define   r7    (scratch)			/* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
+#define   v0    (pp +11 * n)			/* n+1 */
+#define   v1    (pp +12 * n+1)			/* n+1 */
+#define   v2    (pp +13 * n+2)			/* n+1 */
+#define   v3    (scratch +12 * n + 4)		/* n+1 */
+#define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
+#define   wse   (scratch +13 * n + 5)		/* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
+  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
+  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
+
+  /********************** evaluation and recursive calls *********************/
+
+  /* $\pm1/8$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
+
+  /* $\pm8$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
+
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
+      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wsi
+}
+
+#undef TOOM8H_MUL_N_REC
+#undef TOOM8H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom44
+#undef MAYBE_mul_toom8h

diff --git a/mpn/generic/toom_couple_handling.c b/mpn/generic/toom_couple_handling.c
new file mode 100644
index 0000000..cd253f7
--- /dev/null
+++ b/mpn/generic/toom_couple_handling.c

@@ -0,0 +1,80 @@
+/* Helper function for high degree Toom-Cook algorithms.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
+     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
+     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
+   Finally recompose them obtaining:
+     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
+*/
+void
+mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
+			  int nsign, mp_size_t off, int ps, int ns)
+{
+  if (nsign) {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+    mpn_rsh1sub_n (np, pp, np, n);
+#else
+    mpn_sub_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  } else {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+    mpn_rsh1add_n (np, pp, np, n);
+#else
+    mpn_add_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  }
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  if (ps == 1)
+    mpn_rsh1sub_n (pp, pp, np, n);
+  else
+#endif
+  {
+    mpn_sub_n (pp, pp, np, n);
+    if (ps > 0)
+      mpn_rshift (pp, pp, n, ps);
+  }
+  if (ns > 0)
+    mpn_rshift (np, np, n, ns);
+  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
+  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
+}

diff --git a/mpn/generic/toom_eval_dgr3_pm1.c b/mpn/generic/toom_eval_dgr3_pm1.c
new file mode 100644
index 0000000..5f491b6
--- /dev/null
+++ b/mpn/generic/toom_eval_dgr3_pm1.c

@@ -0,0 +1,72 @@
+/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+int
+mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
+			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= 3);
+  ASSERT (xm1[n] <= 1);
+
+  return neg;
+}

diff --git a/mpn/generic/toom_eval_dgr3_pm2.c b/mpn/generic/toom_eval_dgr3_pm2.c
new file mode 100644
index 0000000..55e6b89
--- /dev/null
+++ b/mpn/generic/toom_eval_dgr3_pm2.c

@@ -0,0 +1,97 @@
+/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Needs n+1 limbs of temporary storage. */
+int
+mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
+			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  mp_limb_t cy;
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
+
+  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
+
+  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
+#endif
+  if (x3n < n)
+    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
+  tp[n] = cy;
+#else
+  cy = mpn_lshift (tp, xp + 2*n, n, 2);
+  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
+
+  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
+  if (x3n < n)
+    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
+  else
+    tp[n] += mpn_add_n (tp, xp + n, tp, n);
+#endif
+  mpn_lshift (tp, tp, n+1, 1);
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif
+
+  ASSERT (xp2[n] < 15);
+  ASSERT (xm2[n] < 10);
+
+  return neg;
+}

diff --git a/mpn/generic/toom_eval_pm1.c b/mpn/generic/toom_eval_pm1.c
new file mode 100644
index 0000000..a8cfa93
--- /dev/null
+++ b/mpn/generic/toom_eval_pm1.c

@@ -0,0 +1,89 @@
+/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
+int
+mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
+		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+
+  ASSERT (k >= 4);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  for (i = 4; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
+
+  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
+  for (i = 5; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
+
+  if (k & 1)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
+  else
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= k);
+  ASSERT (xm1[n] <= k/2 + 1);
+
+  return neg;
+}

diff --git a/mpn/generic/toom_eval_pm2.c b/mpn/generic/toom_eval_pm2.c
new file mode 100644
index 0000000..be682c7
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2.c

@@ -0,0 +1,130 @@
+/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller and Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
+   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
+#if HAVE_NATIVE_mpn_addlsh2_n
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_addlsh2_n(d, a, b, n);	\
+} while (0)
+#else
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_addlsh_n(d, a, b, n, 2);	\
+} while (0)
+#else
+/* The following is not a general substitute for addlsh2.
+   It is correct if d == b, but it is not if d == a.  */
+#define DO_addlsh2(d, a, b, n, cy)	\
+do {					\
+  (cy) <<= 2;				\
+  (cy) += mpn_lshift(d, b, n, 2);	\
+  (cy) += mpn_add_n(d, d, a, n);	\
+} while (0)
+#endif
+#endif
+
+/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
+   points +2 and -2. */
+int
+mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
+		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  int i;
+  int neg;
+  mp_limb_t cy;
+
+  ASSERT (k >= 3);
+  ASSERT (k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  cy = 0;
+  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
+  if (hn != n)
+    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
+  xp2[n] = cy;
+
+  k--;
+
+  cy = 0;
+  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
+  tp[n] = cy;
+
+  if (k & 1)
+    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
+  else
+    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  ASSERT (xp2[n] < (1<<(k+2))-1);
+  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
+
+  neg ^= ((k & 1) - 1);
+
+  return neg;
+}
+
+#undef DO_addlsh2

diff --git a/mpn/generic/toom_eval_pm2exp.c b/mpn/generic/toom_eval_pm2exp.c
new file mode 100644
index 0000000..c3c4651
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2exp.c

@@ -0,0 +1,127 @@
+/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
+int
+mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
+		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
+		      mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+#if HAVE_NATIVE_mpn_addlsh_n
+  mp_limb_t cy;
+#endif
+
+  ASSERT (k >= 3);
+  ASSERT (shift*k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+#if HAVE_NATIVE_mpn_addlsh_n
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
+  for (i = 4; i < k; i += 2)
+    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
+
+  if (k & 1)
+    {
+      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
+      MPN_INCR_U (tp + hn, n+1 - hn, cy);
+    }
+  else
+    {
+      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
+      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
+    }
+
+#else /* !HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
+  xp2[n] += mpn_add_n (xp2, xp, tp, n);
+  for (i = 4; i < k; i += 2)
+    {
+      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
+      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
+    }
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    {
+      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
+      tp[n] += mpn_add_n (tp, tp, xm2, n);
+    }
+
+  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
+  if (k & 1)
+    mpn_add (tp, tp, n+1, xm2, hn+1);
+  else
+    mpn_add (xp2, xp2, n+1, xm2, hn+1);
+#endif /* !HAVE_NATIVE_mpn_addlsh_n */
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
+  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
+	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
+  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
+	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
+
+  return neg;
+}

diff --git a/mpn/generic/toom_eval_pm2rexp.c b/mpn/generic/toom_eval_pm2rexp.c
new file mode 100644
index 0000000..6cd62fb
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2rexp.c

@@ -0,0 +1,101 @@
+/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
+
+   Contributed to the GNU project by Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+/* Evaluates a polynomial of degree k >= 3. */
+int
+mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
+		      unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
+		      unsigned int s, mp_ptr ws)
+{
+  unsigned int i;
+  int neg;
+  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
+  ASSERT (n >= t);
+  ASSERT (s != 0); /* or _eval_pm1 should be used */
+  ASSERT (q > 1);
+  ASSERT (s*q < GMP_NUMB_BITS);
+  rp[n] = mpn_lshift(rp, ap, n, s*q);
+  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
+  if( (q & 1) != 0) {
+    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
+  } else {
+    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
+  }
+  for(i=2; i<q-1; i++)
+  {
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
+    i++;
+    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
+  };
+
+  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
+  else
+    mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (rm, ws, rp, n + 1);
+  else
+    mpn_sub_n (rm, rp, ws, n + 1);
+
+  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  return neg;
+}

diff --git a/mpn/generic/toom_interpolate_12pts.c b/mpn/generic/toom_interpolate_12pts.c
new file mode 100644
index 0000000..6273466
--- /dev/null
+++ b/mpn/generic/toom_interpolate_12pts.c

@@ -0,0 +1,374 @@
+/* Interpolation for the algorithm Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented: Both sublsh_n(,,,20) should be corrected.
+#endif
+
+#if GMP_NUMB_BITS < 16
+#error Not implemented: divexact_by42525 needs splitting.
+#endif
+
+#if GMP_NUMB_BITS < 12
+#error Not implemented: Hard to adapt...
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+#if !defined (AORSMUL_FASTER_2AORSLSH) && !defined (AORSMUL_FASTER_AORS_2AORSLSH)
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
+#else
+#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
+#else
+#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
+#else
+#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
+#endif
+#endif
+
+/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
+   points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 11 (or 10), given the 12 (rsp. 11) values:
+
+     r0 = limit at infinity of f(x) / x^11,
+     r1 = f(4),f(-4),
+     r2 = f(2),f(-2),
+     r3 = f(1),f(-1),
+     r4 = f(1/4),f(-1/4),
+     r5 = f(1/2),f(-1/2),
+     r6 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r6 is stored at {pp, 2n},
+   r4 is stored at {pp + 3n, 3n + 1}.
+   r2 is stored at {pp + 7n, 3n + 1}.
+   r0 is stored at {pp +11n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
+			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r4    (pp + n3)			/* 3n+1 */
+#define   r2    (pp + 7 * n)			/* 3n+1 */
+#define   r0    (pp +11 * n)			/* s+t <= 2*n */
+
+  /******************************* interpolation *****************************/
+  if (half != 0) {
+    cy = mpn_sub_n (r3, r3, r0, spt);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
+    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
+  };
+
+  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
+  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
+  MP_PTR_SWAP(r1, wsi);
+#endif
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
+#else
+  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
+  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
+#endif
+  /* A division by 2835x4 follows. Warning: the operand can be negative! */
+  mpn_divexact_by2835x4(r4, r4, n3p1);
+  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
+#endif
+  mpn_divexact_by255(r5, r5, n3p1);
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
+
+#if AORSMUL_FASTER_3AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
+#else
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
+#endif
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
+  mpn_divexact_by42525(r1, r1, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
+#endif
+  mpn_divexact_by9x4(r2, r2, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (r4, r2, r4, n3p1);
+  r4 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_sub_n (r4, r2, r4, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r5, r5, r1, n3p1);
+  r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r5, r5, r1, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+
+  /* last interpolation steps... */
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
+  /* ... could be mixed with recomposition
+	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+
+    summation scheme for remaining operations:
+    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
+  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
+  if (half) {
+    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+}

diff --git a/mpn/generic/toom_interpolate_16pts.c b/mpn/generic/toom_interpolate_16pts.c
new file mode 100644
index 0000000..c1457be
--- /dev/null
+++ b/mpn/generic/toom_interpolate_16pts.c

@@ -0,0 +1,545 @@
+/* Interpolation for the algorithm Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2015, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
+#endif
+
+#if GMP_NUMB_BITS < 28
+#error Not implemented: divexact_by188513325 and _by182712915 will not work.
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+#if !defined (AORSMUL_FASTER_2AORSLSH) && !defined (AORSMUL_FASTER_AORS_2AORSLSH)
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for inverses */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
+#define BINVERT_182712915 (GMP_NUMB_MASK &	CNST_LIMB(0x550659DB))
+#define BINVERT_188513325 (GMP_NUMB_MASK &	CNST_LIMB(0xFBC333A5))
+#define BINVERT_255x182712915L (GMP_NUMB_MASK &	CNST_LIMB(0x6FC4CB25))
+#define BINVERT_255x188513325L (GMP_NUMB_MASK &	CNST_LIMB(0x6864275B))
+#if GMP_NAIL_BITS == 0
+#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
+#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
+#else /* GMP_NAIL_BITS != 0 */
+#define BINVERT_255x182712915H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
+#define BINVERT_255x188513325H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
+#endif
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
+#define BINVERT_255x182712915  (GMP_NUMB_MASK &	CNST_LIMB(0x1B649A076FC4CB25))
+#define BINVERT_255x188513325  (GMP_NUMB_MASK &	CNST_LIMB(0x06DB993A6864275B))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by255x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
+#else
+#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
+#else
+#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
+#else
+#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x64
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
+#else
+#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
+#endif
+#endif
+
+#ifndef  mpn_divexact_by255x182712915
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
+/* FIXME: use mpn_bdiv_q_2_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
+#define mpn_divexact_by255x182712915(dst,src,size)				\
+  do {										\
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);	\
+    mpn_divexact_by255(dst,dst,size);						\
+  } while(0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size)	\
+  do {							\
+    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));	\
+    mpn_divexact_by255(dst,dst,size);			\
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
+#define mpn_divexact_by255x182712915(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+#ifndef  mpn_divexact_by255x188513325
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
+/* FIXME: use mpn_bdiv_q_1_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
+#define mpn_divexact_by255x188513325(dst,src,size)			\
+  do {									\
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);	\
+    mpn_divexact_by255(dst,dst,size);					\
+  } while(0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size)	\
+  do {							\
+    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));	\
+    mpn_divexact_by255(dst,dst,size);			\
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
+#define mpn_divexact_by255x188513325(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
+   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
+   +-1/8, 0. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
+   14), given the 16 (rsp. 15) values:
+
+     r0 = limit at infinity of f(x) / x^15,
+     r1 = f(8),f(-8),
+     r2 = f(4),f(-4),
+     r3 = f(2),f(-2),
+     r4 = f(1),f(-1),
+     r5 = f(1/4),f(-1/4),
+     r6 = f(1/2),f(-1/2),
+     r7 = f(1/8),f(-1/8),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 8*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r6 is stored at {pp + 3n, 3n + 1}.
+   r4 is stored at {pp + 7n, 3n + 1}.
+   r2 is stored at {pp +11n, 3n + 1}.
+   r0 is stored at {pp +15n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
+			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r6    (pp + n3)			/* 3n+1 */
+#define   r4    (pp + 7 * n)			/* 3n+1 */
+#define   r2    (pp +11 * n)			/* 3n+1 */
+#define   r0    (pp +15 * n)			/* s+t <= 2*n */
+
+  ASSERT( spt <= 2 * n );
+  /******************************* interpolation *****************************/
+  if( half != 0) {
+    cy = mpn_sub_n (r4, r4, r0, spt);
+    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
+
+    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
+		    n3p1 - spt - BIT_CORRECTION, cy);
+    ASSERT (BIT_CORRECTION > 0 || cy == 0);
+    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
+    cy = r7[n3p1];
+    r7[n3p1] = 0x80;
+#else
+    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
+#endif
+    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
+#if BIT_CORRECTION
+    /* FIXME: assumes r7[n3p1] is writable. */
+    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
+    r7[n3p1] = cy;
+#endif
+  };
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
+  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
+  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
+  MP_PTR_SWAP(r3, wsi);
+#endif
+
+  cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
+  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
+  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
+  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
+#else
+  r7[n3] -= cy;
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
+#else
+  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
+  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
+  MP_PTR_SWAP(r7, wsi);
+#endif
+
+  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
+#endif
+
+  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
+#if AORSMUL_FASTER_3AORSLSH
+  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
+#endif
+  mpn_divexact_by255x188513325(r7, r7, n3p1);
+
+  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
+  /* A division by 2835x64 follows. Warning: the operand can be negative! */
+  mpn_divexact_by2835x64(r5, r5, n3p1);
+  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
+    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
+#else
+  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
+#endif
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
+#else
+  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
+#endif
+  /* A division by 255x4 follows. Warning: the operand can be negative! */
+  mpn_divexact_by255x4(r6, r6, n3p1);
+  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
+
+  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
+  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
+  mpn_submul_1 (r1, r2, n3p1, 1428);
+  mpn_submul_1 (r1, r3, n3p1, 112896);
+  mpn_divexact_by255x182712915(r1, r1, n3p1);
+
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
+  mpn_divexact_by42525x16(r2, r2, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
+#endif
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
+  mpn_divexact_by9x16(r3, r3, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r6, r2, r6, n3p1);
+  r6 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r6, r2, r6, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (r5, r3, r5, n3p1);
+  r5 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_sub_n (r5, r3, r5, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
+
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (r7, r1, r7, n3p1);
+  r7 [n3p1 - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (r7, r1, r7, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
+#endif
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
+
+  /* last interpolation steps... */
+  /* ... could be mixed with recomposition
+	||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+
+    summation scheme for remaining operations:
+    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n);
+  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r7 + 2 * n, n + 1, cy);
+  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
+  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
+
+  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
+  if ( half ) {
+    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+#undef   r6
+}

diff --git a/mpn/generic/toom_interpolate_5pts.c b/mpn/generic/toom_interpolate_5pts.c
new file mode 100644
index 0000000..466ab85
--- /dev/null
+++ b/mpn/generic/toom_interpolate_5pts.c

@@ -0,0 +1,198 @@
+/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
+
+   Contributed to the GNU project by Robert Harley.
+   Improvements by Paul Zimmermann and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
+			   mp_size_t k, mp_size_t twor, int sa,
+			   mp_limb_t vinf0)
+{
+  mp_limb_t cy, saved;
+  mp_size_t twok;
+  mp_size_t kk1;
+  mp_ptr c1, v1, c3, vinf;
+
+  twok = k + k;
+  kk1 = twok + 1;
+
+  c1 = c  + k;
+  v1 = c1 + k;
+  c3 = v1 + k;
+  vinf = c3 + k;
+
+#define v0 (c)
+  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
+     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
+  */
+  if (sa)
+    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+  else
+    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
+
+  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
+						      /* (5 3 1 1 0)*/
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
+
+  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                         (0  1 0  1 0)
+     No carry comes out from {v1, kk1} +/- {vm1, kk1},
+     and the division by two is exact.
+     If (sa!=0) the sign of vm1 is negative */
+  if (sa)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
+     t1 >= 0
+  */
+  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
+     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
+  */
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (v2, v2, v1, kk1);
+#else
+  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
+  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+#endif
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
+     result is v1 >= 0
+  */
+  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+
+  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
+  /* Memory allocated for vm1 is now free, it can be recycled ...*/
+
+  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
+     result is v2 >= 0 */
+  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
+  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
+#else
+  /* Overwrite unused vm1 */
+  cy = mpn_lshift (vm1, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, vm1, twor);
+#endif
+  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
+
+  /* Current matrix is
+     [1 0 0 0 0; vinf
+      0 1 0 0 0; v2
+      1 0 1 0 0; v1
+      0 1 0 1 0; vm1
+      0 0 0 0 1] v0
+     Some values already are in-place (we added vm1 in the correct position)
+     | vinf|  v1 |  v0 |
+	      | vm1 |
+     One still is in a separated area
+	| +v2 |
+     We have to compute v1-=vinf; vm1 -= v2,
+	   |-vinf|
+	      | -v2 |
+     Carefully reordering operations we can avoid to compute twice the sum
+     of the high half of v2 plus the low half of vinf.
+  */
+
+  /* Add the high half of t2 in {vinf} */
+  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
+    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
+    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+  } else { /* triggered only by very unbalanced cases like
+	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
+    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
+  }
+  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
+     result is >= 0 */
+  /* Side effect: we also subtracted (high half) vm1 -= v2 */
+  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
+  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
+  vinf[0] = saved;
+  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+
+  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     Operate only on the low half.
+  */
+  cy = mpn_sub_n (c1, c1, v2, k);
+  MPN_DECR_U (v1, kk1, cy);
+
+  /********************* Beginning the final phase **********************/
+
+  /* Most of the recomposition was done */
+
+  /* add t2 in {c+3k, ...}, but only the low half */
+  cy = mpn_add_n (c3, c3, v2, k);
+  vinf[0] += cy;
+  ASSERT(vinf[0] >= cy); /* No carry */
+  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+
+#undef v0
+}

diff --git a/mpn/generic/toom_interpolate_6pts.c b/mpn/generic/toom_interpolate_6pts.c
new file mode 100644
index 0000000..eb23661
--- /dev/null
+++ b/mpn/generic/toom_interpolate_6pts.c

@@ -0,0 +1,241 @@
+/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+/* Interpolation for Toom-3.5, using the evaluation points: infinity,
+   1, -1, 2, -2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
+   six values
+
+     w5 = f(0),
+     w4 = f(-1),
+     w3 = f(1)
+     w2 = f(-2),
+     w1 = f(2),
+     w0 = limit at infinity of f(x) / x^5,
+
+   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
+   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
+   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-2) may be negative, signs
+   determined by the flag bits. All intermediate results are positive.
+   Inputs are destroyed.
+
+   Interpolation sequence was taken from the paper: "Integer and
+   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
+   Some slight variations were introduced: adaptation to "gmp
+   instruction set", and a final saving of an operation by interlacing
+   interpolation and recomposition phases.
+*/
+
+void
+mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
+			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
+			   mp_size_t w0n)
+{
+  mp_limb_t cy;
+  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
+  mp_limb_t cy4, cy6, embankment;
+
+  ASSERT( n > 0 );
+  ASSERT( 2*n >= w0n && w0n > 0 );
+
+#define w5  pp					/* 2n   */
+#define w3  (pp + 2 * n)			/* 2n+1 */
+#define w0  (pp + 5 * n)			/* w0n  */
+
+  /* Interpolate with sequence:
+     W2 =(W1 - W2)>>2
+     W1 =(W1 - W5)>>1
+     W1 =(W1 - W2)>>1
+     W4 =(W3 - W4)>>1
+     W2 =(W2 - W4)/3
+     W3 = W3 - W4 - W5
+     W1 =(W1 - W3)/3
+     // Last steps are mixed with recomposition...
+     W2 = W2 - W0<<2
+     W4 = W4 - W2
+     W3 = W3 - W1
+     W2 = W2 - W0
+  */
+
+  /* W2 =(W1 - W2)>>2 */
+  if (flags & toom6_vm2_neg)
+    mpn_add_n (w2, w1, w2, 2 * n + 1);
+  else
+    mpn_sub_n (w2, w1, w2, 2 * n + 1);
+  mpn_rshift (w2, w2, 2 * n + 1, 2);
+
+  /* W1 =(W1 - W5)>>1 */
+  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+
+  /* W1 =(W1 - W2)>>1 */
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
+#else
+  mpn_sub_n (w1, w1, w2, 2 * n + 1);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+#endif
+
+  /* W4 =(W3 - W4)>>1 */
+  if (flags & toom6_vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_add_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_sub_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+
+  /* W2 =(W2 - W4)/3 */
+  mpn_sub_n (w2, w2, w4, 2 * n + 1);
+  mpn_divexact_by3 (w2, w2, 2 * n + 1);
+
+  /* W3 = W3 - W4 - W5 */
+  mpn_sub_n (w3, w3, w4, 2 * n + 1);
+  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
+
+  /* W1 =(W1 - W3)/3 */
+  mpn_sub_n (w1, w1, w3, 2 * n + 1);
+  mpn_divexact_by3 (w1, w1, 2 * n + 1);
+
+  /*
+    [1 0 0 0 0 0;
+     0 1 0 0 0 0;
+     1 0 1 0 0 0;
+     0 1 0 1 0 0;
+     1 0 1 0 1 0;
+     0 0 0 0 0 1]
+
+    pp[] prior to operations:
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+
+    summation scheme for remaining operations:
+     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+				    || H w4  | L w4  |
+		    || H w2  | L w2  |
+	    || H w1  | L w1  |
+			    ||-H w1  |-L w1  |
+		     |-H w0  |-L w0 ||-H w2  |-L w2  |
+  */
+  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
+  MPN_INCR_U (pp + 3 * n + 1, n, cy);
+
+  /* W2 -= W0<<2 */
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
+#else
+  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
+#endif
+#else
+  /* {W4,2*n+1} is now free and can be overwritten. */
+  cy = mpn_lshift(w4, w0, w0n, 2);
+  cy+= mpn_sub_n(w2, w2, w4, w0n);
+#endif
+  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
+
+  /* W4L = W4L - W2L */
+  cy = mpn_sub_n (pp + n, pp + n, w2, n);
+  MPN_DECR_U (w3, 2 * n + 1, cy);
+
+  /* W3H = W3H + W2L */
+  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
+  /* W1L + W2H */
+  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
+  MPN_INCR_U (w1 + n, n + 1, cy);
+
+  /* W0 = W0 + W1H */
+  if (LIKELY (w0n > n))
+    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
+  else
+    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
+
+  /*
+    summation scheme for the next operation:
+     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
+     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
+		     ...-w0___|-w1_w2 |
+  */
+  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
+  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
+
+  /* embankment is a "dirty trick" to avoid carry/borrow propagation
+     beyond allocated memory */
+  embankment = w0[w0n - 1] - 1;
+  w0[w0n - 1] = 1;
+  if (LIKELY (w0n > n)) {
+    if (cy4 > cy6)
+      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
+    else
+      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
+    MPN_INCR_U (w0 + n, w0n - n, cy6);
+  } else {
+    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
+  }
+  w0[w0n - 1] += embankment;
+
+#undef w5
+#undef w3
+#undef w0
+
+}

diff --git a/mpn/generic/toom_interpolate_7pts.c b/mpn/generic/toom_interpolate_7pts.c
new file mode 100644
index 0000000..167c45b
--- /dev/null
+++ b/mpn/generic/toom_interpolate_7pts.c

@@ -0,0 +1,274 @@
+/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
+
+   Contributed to the GNU project by Niels Möller.
+   Improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2009, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+/* For the various mpn_divexact_byN here, fall back to using either
+   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
+   many faster if it is native.  For now, since mpn_divexact_1 is native on
+   several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
+#else
+#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
+#endif
+#endif
+
+#ifndef mpn_divexact_by15
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
+#else
+#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
+#endif
+#endif
+
+/* Interpolation for toom4, using the evaluation points 0, infinity,
+   1, -1, 2, -2, 1/2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
+   seven values
+
+     w0 = f(0),
+     w1 = f(-2),
+     w2 = f(1),
+     w3 = f(-1),
+     w4 = f(2)
+     w5 = 64 * f(1/2)
+     w6 = limit at infinity of f(x) / x^6,
+
+   The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
+   w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
+   w6n }. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-1/2) may be negative, signs
+   determined by the flag bits. Inputs are destroyed.
+
+   Needs (2*n + 1) limbs of temporary storage.
+*/
+
+void
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
+			   mp_size_t w6n, mp_ptr tp)
+{
+  mp_size_t m;
+  mp_limb_t cy;
+
+  m = 2*n + 1;
+#define w0 rp
+#define w2 (rp + 2*n)
+#define w6 (rp + 6*n)
+
+  ASSERT (w6n > 0);
+  ASSERT (w6n <= 2*n);
+
+  /* Using formulas similar to Marco Bodrato's
+
+     W5 = W5 + W4
+     W1 =(W4 - W1)/2
+     W4 = W4 - W0
+     W4 =(W4 - W1)/4 - W6*16
+     W3 =(W2 - W3)/2
+     W2 = W2 - W3
+
+     W5 = W5 - W2*65      May be negative.
+     W2 = W2 - W6 - W0
+     W5 =(W5 + W2*45)/2   Now >= 0 again.
+     W4 =(W4 - W2)/3
+     W2 = W2 - W4
+
+     W1 = W5 - W1         May be negative.
+     W5 =(W5 - W3*8)/9
+     W3 = W3 - W5
+     W1 =(W1/15 + W5)/2   Now >= 0 again.
+     W5 = W5 - W1
+
+     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
+	   W4 = f(2), W5 = f(1/2), W6 = f(oo),
+
+     Note that most intermediate results are positive; the ones that
+     may be negative are represented in two's complement. We must
+     never shift right a value that may be negative, since that would
+     invalidate the sign bit. On the other hand, divexact by odd
+     numbers work fine with two's complement.
+  */
+
+  mpn_add_n (w5, w5, w4, m);
+  if (flags & toom7_w1_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w1, w1, w4, m);
+#else
+      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w1, w4, w1, m);
+#else
+      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  mpn_sub (w4, w4, m, w0, 2*n);
+  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
+  mpn_rshift (w4, w4, m, 2); /* w4>=0 */
+
+  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
+  mpn_sub (w4, w4, m, tp, w6n+1);
+
+  if (flags & toom7_w3_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w3, w3, w2, m);
+#else
+      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w3, w2, w3, m);
+#else
+      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+
+  mpn_sub_n (w2, w2, w3, m);
+
+  mpn_submul_1 (w5, w2, m, 65);
+  mpn_sub (w2, w2, m, w6, w6n);
+  mpn_sub (w2, w2, m, w0, 2*n);
+
+  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
+  mpn_rshift (w5, w5, m, 1);
+  mpn_sub_n (w4, w4, w2, m);
+
+  mpn_divexact_by3 (w4, w4, m);
+  mpn_sub_n (w2, w2, w4, m);
+
+  mpn_sub_n (w1, w5, w1, m);
+  mpn_lshift (tp, w3, m, 3);
+  mpn_sub_n (w5, w5, tp, m);
+  mpn_divexact_by9 (w5, w5, m);
+  mpn_sub_n (w3, w3, w5, m);
+
+  mpn_divexact_by15 (w1, w1, m);
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+  mpn_rsh1add_n (w1, w1, w5, m);
+  w1[m - 1] &= GMP_NUMB_MASK >> 1;
+#else
+  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
+  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+#endif
+
+  mpn_sub_n (w5, w5, w1, m);
+
+  /* These bounds are valid for the 4x4 polynomial product of toom44,
+   * and they are conservative for toom53 and toom62. */
+  ASSERT (w1[2*n] < 2);
+  ASSERT (w2[2*n] < 3);
+  ASSERT (w3[2*n] < 4);
+  ASSERT (w4[2*n] < 3);
+  ASSERT (w5[2*n] < 2);
+
+  /* Addition chain. Note carries and the 2n'th limbs that need to be
+   * added in.
+   *
+   * Special care is needed for w2[2n] and the corresponding carry,
+   * since the "simple" way of adding it all together would overwrite
+   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
+   * the high half of w3 and the low half of w4.
+   *
+   *         7    6    5    4    3    2    1    0
+   *    |    |    |    |    |    |    |    |    |
+   *                  ||w3 (2n+1)|
+   *             ||w4 (2n+1)|
+   *        ||w5 (2n+1)|        ||w1 (2n+1)|
+   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
+   *  -----------------------------------------------
+   *  r |    |    |    |    |    |    |    |    |
+   *        c7   c6   c5   c4   c3                 Carries to propagate
+   */
+
+  cy = mpn_add_n (rp + n, rp + n, w1, m);
+  MPN_INCR_U (w2 + n + 1, n , cy);
+  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
+  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
+  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
+  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
+  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
+  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
+  if (w6n > n + 1)
+    {
+      cy = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+      MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, cy);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
+#if WANT_ASSERT
+      {
+	mp_size_t i;
+	for (i = w6n; i <= n; i++)
+	  ASSERT (w5[n + i] == 0);
+      }
+#endif
+    }
+}

diff --git a/mpn/generic/toom_interpolate_8pts.c b/mpn/generic/toom_interpolate_8pts.c
new file mode 100644
index 0000000..5e65fab
--- /dev/null
+++ b/mpn/generic/toom_interpolate_8pts.c

@@ -0,0 +1,211 @@
+/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
+
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by45
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by45(dst,src,size) \
+  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
+#else
+#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
+#endif
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
+#else
+#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift (ws,src,n,s);
+  return __cy + mpn_sub_n (dst,dst,ws,n);
+#endif
+}
+#endif
+
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
+#else
+/* This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
+do {									\
+  mp_limb_t __cy;							\
+  MPN_DECR_U (dst, nd, src[0] >> s);					\
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
+} while (0)
+#endif
+
+/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
+   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 7 (or 6), given the 8 (rsp. 7) values:
+
+     r1 = limit at infinity of f(x) / x^7,
+     r2 = f(4),
+     r3 = f(-4),
+     r4 = f(2),
+     r5 = f(-2),
+     r6 = f(1),
+     r7 = f(-1),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r5 is stored at {pp + 3n, 3n + 1}.
+
+   The other values are 2n+... limbs each (with most significant limbs small).
+
+   All intermediate results are positive.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
+			   mp_ptr r3, mp_ptr r7,
+			   mp_size_t spt, mp_ptr ws)
+{
+  mp_limb_signed_t cy;
+  mp_ptr r5, r1;
+  r5 = (pp + 3 * n);			/* 3n+1 */
+  r1 = (pp + 7 * n);			/* spt */
+
+  /******************************* interpolation *****************************/
+
+  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
+  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
+  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
+
+  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
+  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
+  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
+
+  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
+  cy = mpn_sub_n (r7, r7, r1, spt);
+  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
+
+  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+
+  mpn_divexact_by45 (r3, r3, 3 * n + 1);
+
+  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
+
+  /* last interpolation steps... */
+  /* ... are mixed with recomposition */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
+
+    summation scheme for remaining operations:
+     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
+     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
+	  ||_H r3|_M r3|_L*r3|
+				  ||_H_r7|_M_r7|_L_r7|
+		      ||-H r3|-M r3|-L*r3|
+				  ||-H*r5|-M_r5|-L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
+  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
+  if (cy > 0) {
+    MPN_INCR_U (r7 + n, 2*n + 1, 1);
+    cy = 0;
+  }
+
+  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
+  MPN_DECR_U (r7 + 2*n, n + 1, cy);
+
+  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
+  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
+  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
+  if (UNLIKELY(0 > cy))
+    MPN_DECR_U (r5 + n + 1, 2*n, 1);
+  else
+    MPN_INCR_U (r5 + n + 1, 2*n, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
+
+  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
+  MPN_INCR_U (r3 + 2*n, n + 1, cy);
+  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+  if (LIKELY(spt != n))
+    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
+  else
+    ASSERT (r3[3*n] + cy == 0);
+}

diff --git a/mpn/generic/trialdiv.c b/mpn/generic/trialdiv.c
new file mode 100644
index 0000000..65e089f
--- /dev/null
+++ b/mpn/generic/trialdiv.c

@@ -0,0 +1,131 @@
+/* mpn_trialdiv -- find small factors of an mpn number using trial division.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+   This function finds the first (smallest) factor represented in
+   trialdivtab.h.  It does not stop the factoring effort just because it has
+   reached some sensible limit, such as the square root of the input number.
+
+   The caller can limit the factoring effort by passing NPRIMES.  The function
+   will then divide until that limit, or perhaps a few primes more.  A position
+   which only mpn_trialdiv can make sense of is returned in the WHERE
+   parameter.  It can be used for restarting the factoring effort; the first
+   call should pass 0 here.
+
+   Input:        1. A non-negative number T = {tp,tn}
+                 2. NPRIMES as described above,
+                 3. *WHERE as described above.
+   Output:       1. *WHERE updated as described above.
+                 2. Return value is non-zero if we found a factor, else zero
+                    To get the actual prime factor, compute the mod B inverse
+                    of the return value.
+*/
+
+#include "gmp-impl.h"
+
+struct gmp_primes_dtab {
+  mp_limb_t binv;
+  mp_limb_t lim;
+};
+
+struct gmp_primes_ptab {
+  mp_limb_t ppp;	/* primes, multiplied together */
+  mp_limb_t cps[7];	/* ppp values pre-computed for mpn_mod_1s_4p */
+  gmp_uint_least32_t idx:24;	/* index of  first primes in dtab */
+  gmp_uint_least32_t np :8;	/* number of primes related to this entry */
+};
+
+
+static const struct gmp_primes_dtab gmp_primes_dtab[] =
+{
+#define WANT_dtab
+#define P(p,inv,lim) {inv,lim}
+#include "trialdivtab.h"
+#undef WANT_dtab
+#undef P
+  {0,0}
+};
+
+static const struct gmp_primes_ptab gmp_primes_ptab[] =
+{
+#define WANT_ptab
+#include "trialdivtab.h"
+#undef WANT_ptab
+};
+
+#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
+
+/* FIXME: We could optimize out one of the outer loop conditions if we
+   had a final ptab entry with a huge np field.  */
+mp_limb_t
+mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
+{
+  mp_limb_t ppp;
+  const mp_limb_t *cps;
+  const struct gmp_primes_dtab *dp;
+  long i, j, idx, np;
+  mp_limb_t r, q;
+
+  ASSERT (tn >= 1);
+
+  for (i = *where; i < PTAB_LINES; i++)
+    {
+      ppp = gmp_primes_ptab[i].ppp;
+      cps = gmp_primes_ptab[i].cps;
+
+      r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+
+      idx = gmp_primes_ptab[i].idx;
+      np = gmp_primes_ptab[i].np;
+
+      /* Check divisibility by individual primes.  */
+      dp = &gmp_primes_dtab[idx] + np;
+      for (j = -np; j < 0; j++)
+	{
+	  q = r * dp[j].binv;
+	  if (q <= dp[j].lim)
+	    {
+	      *where = i;
+	      return dp[j].binv;
+	    }
+	}
+
+      nprimes -= np;
+      if (nprimes <= 0)
+	return 0;
+    }
+  return 0;
+}

diff --git a/mpn/generic/udiv_w_sdiv.c b/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 0000000..7907135
--- /dev/null
+++ b/mpn/generic/udiv_w_sdiv.c

@@ -0,0 +1,141 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+   division.
+
+   Contributed by Peter L. Montgomery.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY SAFE
+   TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+   GNU MP RELEASE.
+
+
+Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+{
+  mp_limb_t q, r;
+  mp_limb_t c0, c1, b1;
+
+  ASSERT (d != 0);
+  ASSERT (a1 < d);
+
+  if ((mp_limb_signed_t) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+	{
+	  /* dividend, divisor, and quotient are nonnegative */
+	  sdiv_qrnnd (q, r, a1, a0, d);
+	}
+      else
+	{
+	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+	  /* Divide (c1*2^32 + c0) by d */
+	  sdiv_qrnnd (q, r, c1, c0, d);
+	  /* Add 2^31 to quotient */
+	  q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+	}
+    }
+  else
+    {
+      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;			/* A/2 */
+      c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+
+      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
+	{
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
+	{
+	  c1 = (b1 - 1) - c1;
+	  c0 = ~c0;			/* logical NOT */
+
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  q = ~q;			/* (A/2)/b1 */
+	  r = (b1 - 1) - r;
+
+	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
+
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else				/* Implies c1 = b1 */
+	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
+	  if (a0 >= -d)
+	    {
+	      q = -CNST_LIMB(1);
+	      r = a0 + d;
+	    }
+	  else
+	    {
+	      q = -CNST_LIMB(2);
+	      r = a0 + 2*d;
+	    }
+	}
+    }
+
+  *rp = r;
+  return q;
+}

diff --git a/mpn/generic/zero.c b/mpn/generic/zero.c
new file mode 100644
index 0000000..1a05453
--- /dev/null
+++ b/mpn/generic/zero.c

@@ -0,0 +1,41 @@
+/* mpn_zero
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+  mp_size_t i;
+
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = 0;
+}

diff --git a/mpn/generic/zero_p.c b/mpn/generic/zero_p.c
new file mode 100644
index 0000000..c92f9b8
--- /dev/null
+++ b/mpn/generic/zero_p.c

@@ -0,0 +1,33 @@
+/* mpn_zero_p (x,xsize) -- Return 1 if X is zero, 0 if it is non-zero.
+
+Copyright 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_zero_p 1
+
+#include "gmp-impl.h"

diff --git a/mpn/x86/aors_n.asm b/mpn/x86/aors_n.asm
new file mode 100644
index 0000000..1f97b39
--- /dev/null
+++ b/mpn/x86/aors_n.asm

@@ -0,0 +1,202 @@
+dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb
+C P5	3.375
+C P6	3.125
+C K6	3.5
+C K7	2.25
+C P4	8.75
+
+
+ifdef(`OPERATION_add_n',`
+	define(M4_inst,        adcl)
+	define(M4_function_n,  mpn_add_n)
+	define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+	define(M4_inst,        sbbl)
+	define(M4_function_n,  mpn_sub_n)
+	define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C	                    mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+	TEXT
+	ALIGN(8)
+
+PROLOGUE(M4_function_nc)
+deflit(`FRAME',0)
+
+	pushl	%edi		FRAME_pushl()
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_DST,%edi
+	movl	PARAM_SRC1,%esi
+	movl	PARAM_SRC2,%edx
+	movl	PARAM_SIZE,%ecx
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx			C compute count for unrolled loop
+	negl	%eax
+	andl	$7,%eax			C get index where to start loop
+	jz	L(oopgo)		C necessary special case for 0
+	incl	%ecx			C adjust loop count
+	shll	$2,%eax			C adjustment for pointers...
+	subl	%eax,%edi		C ... since they are offset ...
+	subl	%eax,%esi		C ... by a constant when we ...
+	subl	%eax,%edx		C ... enter the loop
+	shrl	$2,%eax			C restore previous value
+
+ifdef(`PIC',`
+	C Calculate start address in loop for PIC.  Due to limitations in
+	C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
+	call	L(0a)
+L(0a):	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$L(oop)-L(0a)-3,%eax
+	addl	$4,%esp
+',`
+	C Calculate start address in loop for non-PIC.
+	leal	L(oop)-3(%eax,%eax,8),%eax
+')
+
+	C These lines initialize carry from the 5th parameter.  Should be
+	C possible to simplify.
+	pushl	%ebp		FRAME_pushl()
+	movl	PARAM_CARRY,%ebp
+	shrl	%ebp			C shift bit 0 into carry
+	popl	%ebp		FRAME_popl()
+
+	jmp	*%eax			C jump into loop
+
+EPILOGUE()
+
+
+	ALIGN(16)
+PROLOGUE(M4_function_n)
+deflit(`FRAME',0)
+
+	pushl	%edi		FRAME_pushl()
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_DST,%edi
+	movl	PARAM_SRC1,%esi
+	movl	PARAM_SRC2,%edx
+	movl	PARAM_SIZE,%ecx
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx			C compute count for unrolled loop
+	negl	%eax
+	andl	$7,%eax			C get index where to start loop
+	jz	L(oop)			C necessary special case for 0
+	incl	%ecx			C adjust loop count
+	shll	$2,%eax			C adjustment for pointers...
+	subl	%eax,%edi		C ... since they are offset ...
+	subl	%eax,%esi		C ... by a constant when we ...
+	subl	%eax,%edx		C ... enter the loop
+	shrl	$2,%eax			C restore previous value
+
+ifdef(`PIC',`
+	C Calculate start address in loop for PIC.  Due to limitations in
+	C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
+	call	L(0b)
+L(0b):	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$L(oop)-L(0b)-3,%eax
+	addl	$4,%esp
+',`
+	C Calculate start address in loop for non-PIC.
+	leal	L(oop)-3(%eax,%eax,8),%eax
+')
+	jmp	*%eax			C jump into loop
+
+L(oopgo):
+	pushl	%ebp		FRAME_pushl()
+	movl	PARAM_CARRY,%ebp
+	shrl	%ebp			C shift bit 0 into carry
+	popl	%ebp		FRAME_popl()
+
+	ALIGN(16)
+L(oop):	movl	(%esi),%eax
+	M4_inst	(%edx),%eax
+	movl	%eax,(%edi)
+	movl	4(%esi),%eax
+	M4_inst	4(%edx),%eax
+	movl	%eax,4(%edi)
+	movl	8(%esi),%eax
+	M4_inst	8(%edx),%eax
+	movl	%eax,8(%edi)
+	movl	12(%esi),%eax
+	M4_inst	12(%edx),%eax
+	movl	%eax,12(%edi)
+	movl	16(%esi),%eax
+	M4_inst	16(%edx),%eax
+	movl	%eax,16(%edi)
+	movl	20(%esi),%eax
+	M4_inst	20(%edx),%eax
+	movl	%eax,20(%edi)
+	movl	24(%esi),%eax
+	M4_inst	24(%edx),%eax
+	movl	%eax,24(%edi)
+	movl	28(%esi),%eax
+	M4_inst	28(%edx),%eax
+	movl	%eax,28(%edi)
+	leal	32(%edi),%edi
+	leal	32(%esi),%esi
+	leal	32(%edx),%edx
+	decl	%ecx
+	jnz	L(oop)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%esi
+	popl	%edi
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/aorsmul_1.asm b/mpn/x86/aorsmul_1.asm
new file mode 100644
index 0000000..ee7b774
--- /dev/null
+++ b/mpn/x86/aorsmul_1.asm

@@ -0,0 +1,214 @@
+dnl  x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
+dnl  limb and add the result to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C			    cycles/limb
+C P5				
+C P6 model 0-8,10-12		
+C P6 model 9  (Banias)		
+C P6 model 13 (Dothan)		
+C P4 model 0  (Willamette)	
+C P4 model 1  (?)		
+C P4 model 2  (Northwood)	
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom
+C AMD K6			
+C AMD K7			
+C AMD K8			 3.875
+C AMD K10
+
+
+ifdef(`OPERATION_addmul_1',`
+      define(ADDSUB,        addl)
+      define(M4_function_1,  mpn_addmul_1)
+      define(M4_function_1c, mpn_addmul_1c)
+
+',`ifdef(`OPERATION_submul_1',`
+      define(ADDSUB,        subl)
+      define(M4_function_1,  mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1 mpn_addmul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult);
+
+define(PARAM_CARRY,      `FRAME+20(%esp)')
+define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
+define(PARAM_SIZE,       `FRAME+12(%esp)')
+define(PARAM_SRC,        `FRAME+8(%esp)')
+define(PARAM_DST,        `FRAME+4(%esp)')
+
+	TEXT
+	ALIGN(32)
+PROLOGUE(M4_function_1)
+deflit(`FRAME',0)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+deflit(`FRAME',16)
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_SIZE, %ecx
+	movl	PARAM_DST, %edi
+
+	movl	(%esi), %eax
+	mull	PARAM_MULTIPLIER
+
+	testb	$1, %cl
+	jnz	L(bx1)
+
+L(bx0):	movl	%eax, %ebx
+	movl	%edx, %ebp
+	shrl	$2, %ecx
+	jnc	L(lo0)
+
+L(b10):	leal	-8(%esi), %esi
+	leal	-8(%edi), %edi
+	incl	%ecx
+	jmp	L(lo2)
+
+L(bx1):	movl	%eax, %ebp
+	movl	%edx, %ebx
+	shrl	$2, %ecx
+	jc	L(b11)
+
+L(b01):	leal	4(%edi), %edi
+	jz	L(end)
+	leal	4(%esi), %esi
+	jmp	L(top)
+
+L(b11):	leal	-4(%esi), %esi
+	leal	-4(%edi), %edi
+	incl	%ecx
+	jmp	L(lo3)
+
+	ALIGN(16)
+L(top):	movl	(%esi), %eax
+	mull	PARAM_MULTIPLIER
+	ADDSUB	%ebp, -4(%edi)
+	adcl	%eax, %ebx
+	movl	$0, %ebp
+	adcl	%edx, %ebp
+L(lo0):	movl	4(%esi), %eax
+	mull	PARAM_MULTIPLIER
+	ADDSUB	%ebx, (%edi)
+	adcl	%eax, %ebp
+	movl	$0, %ebx
+	adcl	%edx, %ebx
+L(lo3):	movl	8(%esi), %eax
+	mull	PARAM_MULTIPLIER
+	ADDSUB	%ebp, 4(%edi)
+	adcl	%eax, %ebx
+	movl	$0, %ebp
+	adcl	%edx, %ebp
+L(lo2):	movl	12(%esi), %eax
+	mull	PARAM_MULTIPLIER
+	ADDSUB	%ebx, 8(%edi)
+	adcl	%eax, %ebp
+	movl	$0, %ebx
+	adcl	%edx, %ebx
+
+	leal	16(%esi), %esi
+	leal	16(%edi), %edi
+	decl	%ecx
+	jnz	L(top)
+
+L(end):	xor	%eax, %eax
+	ADDSUB	%ebp, -4(%edi)
+	adcl	%ebx, %eax
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+EPILOGUE()
+ifdef(`OPERATION_addmul_1',`
+	ALIGN(32)
+PROLOGUE(M4_function_1c)
+deflit(`FRAME',0)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+deflit(`FRAME',16)
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_SIZE, %ecx
+	movl	PARAM_DST, %edi
+
+	movl	(%esi), %eax
+	mull	PARAM_MULTIPLIER
+
+	testb	$1, %cl
+	jnz	L(cx1)
+
+	movl	PARAM_CARRY, %ebx
+	xorl	%ebp, %ebp
+
+L(cx0):	addl	%eax, %ebx
+	adcl	%edx, %ebp
+	shrl	$2, %ecx
+	jnc	L(lo0)
+
+L(c10):	leal	-8(%esi), %esi
+	leal	-8(%edi), %edi
+	incl	%ecx
+	jmp	L(lo2)
+
+L(cx1):	movl	PARAM_CARRY, %ebp
+	xorl	%ebx, %ebx
+
+	addl	%eax, %ebp
+	adcl	%edx, %ebx
+	shrl	$2, %ecx
+	jc	L(c11)
+
+L(c01):	leal	4(%edi), %edi
+	jz	L(end)
+	leal	4(%esi), %esi
+	jmp	L(top)
+
+L(c11):	leal	-4(%esi), %esi
+	leal	-4(%edi), %edi
+	incl	%ecx
+	jmp	L(lo3)
+EPILOGUE()
+')

diff --git a/mpn/x86/bdiv_dbm1c.asm b/mpn/x86/bdiv_dbm1c.asm
new file mode 100644
index 0000000..291a783
--- /dev/null
+++ b/mpn/x86/bdiv_dbm1c.asm

@@ -0,0 +1,129 @@
+dnl  x86 mpn_bdiv_dbm1.
+
+dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)		 5.1
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)	13.67
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom
+C AMD K6
+C AMD K7			 3.5
+C AMD K8
+C AMD K10
+
+
+C TODO
+C  * Optimize for more x86 processors
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+	mov	16(%esp), %ecx		C d
+	push	%esi
+	mov	12(%esp), %esi		C ap
+	push	%edi
+	mov	12(%esp), %edi		C qp
+	push	%ebp
+	mov	24(%esp), %ebp		C n
+	push	%ebx
+
+	mov	(%esi), %eax
+	mul	%ecx
+	mov	36(%esp), %ebx
+	sub	%eax, %ebx
+	mov	%ebx, (%edi)
+	sbb	%edx, %ebx
+
+	mov	%ebp, %eax
+	and	$3, %eax
+	jz	L(b0)
+	cmp	$2, %eax
+	jc	L(b1)
+	jz	L(b2)
+
+L(b3):	lea	-8(%esi), %esi
+	lea	8(%edi), %edi
+	add	$-3, %ebp
+	jmp	L(3)
+
+L(b0):	mov	4(%esi), %eax
+	lea	-4(%esi), %esi
+	lea	12(%edi), %edi
+	add	$-4, %ebp
+	jmp	L(0)
+
+L(b2):	mov	4(%esi), %eax
+	lea	4(%esi), %esi
+	lea	4(%edi), %edi
+	add	$-2, %ebp
+	jmp	L(2)
+
+	ALIGN(8)
+L(top):	mov	4(%esi), %eax
+	mul	%ecx
+	lea	16(%edi), %edi
+	sub	%eax, %ebx
+	mov	8(%esi), %eax
+	mov	%ebx, -12(%edi)
+	sbb	%edx, %ebx
+L(0):	mul	%ecx
+	sub	%eax, %ebx
+	mov	%ebx, -8(%edi)
+	sbb	%edx, %ebx
+L(3):	mov	12(%esi), %eax
+	mul	%ecx
+	sub	%eax, %ebx
+	mov	%ebx, -4(%edi)
+	mov	16(%esi), %eax
+	lea	16(%esi), %esi
+	sbb	%edx, %ebx
+L(2):	mul	%ecx
+	sub	%eax, %ebx
+	mov	%ebx, 0(%edi)
+	sbb	%edx, %ebx
+L(b1):	add	$-4, %ebp
+	jns	L(top)
+
+	mov	%ebx, %eax
+	pop	%ebx
+	pop	%ebp
+	pop	%edi
+	pop	%esi
+	ret
+EPILOGUE()

diff --git a/mpn/x86/bdiv_q_1.asm b/mpn/x86/bdiv_q_1.asm
new file mode 100644
index 0000000..7e2a493
--- /dev/null
+++ b/mpn/x86/bdiv_q_1.asm

@@ -0,0 +1,208 @@
+dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
+
+dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb
+C P54    30.0
+C P55    29.0
+C P6     13.0 odd divisor, 12.0 even (strangely)
+C K6     14.0
+C K7     12.0
+C P4     42.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+	TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C		    mp_limb_t inverse, int shift)
+
+	ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SHIFT, %ecx
+	pushl	%ebp	FRAME_pushl()
+
+	movl	PARAM_INVERSE, %eax
+	movl	PARAM_SIZE, %ebp
+	pushl	%ebx	FRAME_pushl()
+L(common):
+	pushl	%edi	FRAME_pushl()
+	pushl	%esi	FRAME_pushl()
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_DST, %edi
+
+	leal	(%esi,%ebp,4), %esi	C src end
+	leal	(%edi,%ebp,4), %edi	C dst end
+	negl	%ebp			C -size
+
+	movl	%eax, VAR_INVERSE
+	movl	(%esi,%ebp,4), %eax	C src[0]
+
+	xorl	%ebx, %ebx
+	xorl	%edx, %edx
+
+	incl	%ebp
+	jz	L(one)
+
+	movl	(%esi,%ebp,4), %edx	C src[1]
+
+	shrdl(	%cl, %edx, %eax)
+
+	movl	VAR_INVERSE, %edx
+	jmp	L(entry)
+
+
+	ALIGN(8)
+	nop	C k6 code alignment
+	nop
+L(top):
+	C eax	q
+	C ebx	carry bit, 0 or -1
+	C ecx	shift
+	C edx	carry limb
+	C esi	src end
+	C edi	dst end
+	C ebp	counter, limbs, negative
+
+	movl	-4(%esi,%ebp,4), %eax
+	subl	%ebx, %edx		C accumulate carry bit
+
+	movl	(%esi,%ebp,4), %ebx
+
+	shrdl(	%cl, %ebx, %eax)
+
+	subl	%edx, %eax		C apply carry limb
+	movl	VAR_INVERSE, %edx
+
+	sbbl	%ebx, %ebx
+
+L(entry):
+	imull	%edx, %eax
+
+	movl	%eax, -4(%edi,%ebp,4)
+	movl	PARAM_DIVISOR, %edx
+
+	mull	%edx
+
+	incl	%ebp
+	jnz	L(top)
+
+
+	movl	-4(%esi), %eax		C src high limb
+L(one):
+	shrl	%cl, %eax
+	popl	%esi	FRAME_popl()
+
+	addl	%ebx, %eax		C apply carry bit
+
+	subl	%edx, %eax		C apply carry limb
+
+	imull	VAR_INVERSE, %eax
+
+	movl	%eax, -4(%edi)
+
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+
+	ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_DIVISOR, %eax
+	pushl	%ebp	FRAME_pushl()
+
+	movl	$-1, %ecx		C shift count
+	movl	PARAM_SIZE, %ebp
+
+	pushl	%ebx	FRAME_pushl()
+
+L(strip_twos):
+	incl	%ecx
+
+	shrl	%eax
+	jnc	L(strip_twos)
+
+	leal	1(%eax,%eax), %ebx	C d without twos
+	andl	$127, %eax		C d/2, 7 bits
+
+ifdef(`PIC',`
+	LEA(	binvert_limb_table, %edx)
+	movzbl	(%eax,%edx), %eax		C inv 8 bits
+',`
+	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
+')
+
+	leal	(%eax,%eax), %edx	C 2*inv
+	movl	%ebx, PARAM_DIVISOR	C d without twos
+	imull	%eax, %eax		C inv*inv
+	imull	%ebx, %eax		C inv*inv*d
+	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
+
+	leal	(%edx,%edx), %eax	C 2*inv
+	imull	%edx, %edx		C inv*inv
+	imull	%ebx, %edx		C inv*inv*d
+	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
+
+	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	pushl	%eax	FRAME_pushl()
+	imull	PARAM_DIVISOR, %eax
+	cmpl	$1, %eax
+	popl	%eax	FRAME_popl()')
+
+	jmp	L(common)
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86/cnd_aors_n.asm b/mpn/x86/cnd_aors_n.asm
new file mode 100644
index 0000000..f47786a
--- /dev/null
+++ b/mpn/x86/cnd_aors_n.asm

@@ -0,0 +1,124 @@
+dnl  X86 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9   (Banias)		 ?
+C P6 model 13  (Dothan)		 5.4
+C P4 model 0-1 (Willamette)	 ?
+C P4 model 2   (Northwood)	14.5
+C P4 model 3-4 (Prescott)	21
+C Intel atom			11
+C AMD K6			 ?
+C AMD K7			 3.4
+C AMD K8			 ?
+
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')
+define(`n',   `%ecx')
+define(`cnd', `20(%esp)')
+define(`cy',  `%edx')
+
+ifdef(`OPERATION_cnd_add_n', `
+	define(ADDSUB,	      add)
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+	define(ADDSUB,	      sub)
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	add	$-16, %esp
+	mov	%ebp, (%esp)
+	mov	%ebx, 4(%esp)
+	mov	%esi, 8(%esp)
+	mov	%edi, 12(%esp)
+
+	C make cnd into a full mask
+	mov	cnd, %eax
+	neg	%eax
+	sbb	%eax, %eax
+	mov	%eax, cnd
+
+	C load parameters into registers
+	mov	24(%esp), rp
+	mov	28(%esp), up
+	mov	32(%esp), vp
+	mov	36(%esp), n
+
+	mov	(vp), %eax
+	mov	(up), %ebx
+
+	C put operand pointers just beyond their last limb
+	lea	(vp,n,4), vp
+	lea	(up,n,4), up
+	lea	-4(rp,n,4), rp
+	neg	n
+
+	and	cnd, %eax
+	ADDSUB	%eax, %ebx
+	sbb	cy, cy
+	inc	n
+	je	L(end)
+
+	ALIGN(16)
+L(top):	mov	(vp,n,4), %eax
+	and	cnd, %eax
+	mov	%ebx, (rp,n,4)
+	mov	(up,n,4), %ebx
+	add	cy, cy
+	ADCSBB	%eax, %ebx
+	sbb	cy, cy
+	inc	n
+	jne	L(top)
+
+L(end):	mov	%ebx, (rp)
+	xor	%eax, %eax
+	sub	cy, %eax
+
+	mov	(%esp), %ebp
+	mov	4(%esp), %ebx
+	mov	8(%esp), %esi
+	mov	12(%esp), %edi
+	add	$16, %esp
+	ret
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86/copyd.asm b/mpn/x86/copyd.asm
new file mode 100644
index 0000000..e7f3d61
--- /dev/null
+++ b/mpn/x86/copyd.asm

@@ -0,0 +1,91 @@
+dnl  x86 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 1999-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb  startup (approx)
+C P5	  1.0	      40
+C P6	  2.4	      70
+C K6	  1.0	      55
+C K7	  1.3	      75
+C P4	  2.6	     175
+C
+C (Startup time includes some function call overheads.)
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from high to low addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+	TEXT
+	ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+	C eax	saved esi
+	C ebx
+	C ecx	counter
+	C edx	saved edi
+	C esi	src
+	C edi	dst
+	C ebp
+
+	movl	PARAM_SIZE, %ecx
+	movl	%esi, %eax
+
+	movl	PARAM_SRC, %esi
+	movl	%edi, %edx
+
+	movl	PARAM_DST, %edi
+	leal	-4(%esi,%ecx,4), %esi
+
+	leal	-4(%edi,%ecx,4), %edi
+
+	std
+
+	rep
+	movsl
+
+	cld
+
+	movl	%eax, %esi
+	movl	%edx, %edi
+
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/copyi.asm b/mpn/x86/copyi.asm
new file mode 100644
index 0000000..df0d5b1
--- /dev/null
+++ b/mpn/x86/copyi.asm

@@ -0,0 +1,99 @@
+dnl  x86 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 1999-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb  startup (approx)
+C P5	  1.0	      35
+C P6	  0.75	      45
+C K6	  1.0	      30
+C K7	  1.3	      65
+C P4	  1.0	     120
+C
+C (Startup time includes some function call overheads.)
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from low to high addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P6 -  An MMX based copy was tried, but was found to be slower than a rep
+C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
+C       fully aligned).  A rep movs seems to have a startup time of about 15
+C       cycles, but doing something special for small sizes could lead to a
+C       branch misprediction that would destroy any saving.  For now a plain
+C       rep movs seems ok.
+C
+C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
+C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
+C       it only got an advantage half the time, and even then only showed it
+C       above 50 limbs or so.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+	TEXT
+	ALIGN(32)
+
+	C eax	saved esi
+	C ebx
+	C ecx	counter
+	C edx	saved edi
+	C esi	src
+	C edi	dst
+	C ebp
+
+PROLOGUE(mpn_copyi)
+
+	movl	PARAM_SIZE, %ecx
+	movl	%esi, %eax
+
+	movl	PARAM_SRC, %esi
+	movl	%edi, %edx
+
+	movl	PARAM_DST, %edi
+
+	cld	C better safe than sorry, see mpn/x86/README
+
+	rep
+	movsl
+
+	movl	%eax, %esi
+	movl	%edx, %edi
+
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/dive_1.asm b/mpn/x86/dive_1.asm
new file mode 100644
index 0000000..b9084e7
--- /dev/null
+++ b/mpn/x86/dive_1.asm

@@ -0,0 +1,190 @@
+dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb
+C P54    30.0
+C P55    29.0
+C P6     13.0 odd divisor, 12.0 even (strangely)
+C K6     14.0
+C K7     12.0
+C P4     42.0
+
+
+C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+	TEXT
+
+	ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_DIVISOR, %eax
+	pushl	%ebp	FRAME_pushl()
+
+	movl	PARAM_SIZE, %ebp
+	pushl	%edi	FRAME_pushl()
+
+	pushl	%ebx	FRAME_pushl()
+	movl	$-1, %ecx		C shift count
+
+	pushl	%esi	FRAME_pushl()
+
+L(strip_twos):
+	incl	%ecx
+
+	shrl	%eax
+	jnc	L(strip_twos)
+
+	leal	1(%eax,%eax), %ebx	C d without twos
+	andl	$127, %eax		C d/2, 7 bits
+
+ifdef(`PIC',`
+	LEA(	binvert_limb_table, %edx)
+	movzbl	(%eax,%edx), %eax		C inv 8 bits
+',`
+	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
+')
+
+	leal	(%eax,%eax), %edx	C 2*inv
+	movl	%ebx, PARAM_DIVISOR	C d without twos
+
+	imull	%eax, %eax		C inv*inv
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_DST, %edi
+
+	imull	%ebx, %eax		C inv*inv*d
+
+	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
+	leal	(%edx,%edx), %eax	C 2*inv
+
+	imull	%edx, %edx		C inv*inv
+
+	leal	(%esi,%ebp,4), %esi	C src end
+	leal	(%edi,%ebp,4), %edi	C dst end
+	negl	%ebp			C -size
+
+	imull	%ebx, %edx		C inv*inv*d
+
+	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
+
+	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	pushl	%eax	FRAME_pushl()
+	imull	PARAM_DIVISOR, %eax
+	cmpl	$1, %eax
+	popl	%eax	FRAME_popl()')
+
+	movl	%eax, VAR_INVERSE
+	movl	(%esi,%ebp,4), %eax	C src[0]
+
+	xorl	%ebx, %ebx
+	xorl	%edx, %edx
+
+	incl	%ebp
+	jz	L(one)
+
+	movl	(%esi,%ebp,4), %edx	C src[1]
+
+	shrdl(	%cl, %edx, %eax)
+
+	movl	VAR_INVERSE, %edx
+	jmp	L(entry)
+
+
+	ALIGN(8)
+	nop	C k6 code alignment
+	nop
+L(top):
+	C eax	q
+	C ebx	carry bit, 0 or -1
+	C ecx	shift
+	C edx	carry limb
+	C esi	src end
+	C edi	dst end
+	C ebp	counter, limbs, negative
+
+	movl	-4(%esi,%ebp,4), %eax
+	subl	%ebx, %edx		C accumulate carry bit
+
+	movl	(%esi,%ebp,4), %ebx
+
+	shrdl(	%cl, %ebx, %eax)
+
+	subl	%edx, %eax		C apply carry limb
+	movl	VAR_INVERSE, %edx
+
+	sbbl	%ebx, %ebx
+
+L(entry):
+	imull	%edx, %eax
+
+	movl	%eax, -4(%edi,%ebp,4)
+	movl	PARAM_DIVISOR, %edx
+
+	mull	%edx
+
+	incl	%ebp
+	jnz	L(top)
+
+
+	movl	-4(%esi), %eax		C src high limb
+L(one):
+	shrl	%cl, %eax
+	popl	%esi	FRAME_popl()
+
+	addl	%ebx, %eax		C apply carry bit
+	popl	%ebx	FRAME_popl()
+
+	subl	%edx, %eax		C apply carry limb
+
+	imull	VAR_INVERSE, %eax
+
+	movl	%eax, -4(%edi)
+
+	popl	%edi
+	popl	%ebp
+
+	ret
+
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86/divrem_1.asm b/mpn/x86/divrem_1.asm
new file mode 100644
index 0000000..2d618f1
--- /dev/null
+++ b/mpn/x86/divrem_1.asm

@@ -0,0 +1,233 @@
+dnl  x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
+
+dnl  Copyright 1999-2003, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C       cycles/limb
+C 486   approx 43 maybe
+C P5        44
+C P6        39
+C P6MMX     39
+C K6        22
+C K7        42
+C P4        58
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                          mp_limb_t carry);
+C
+C Divide src,size by divisor and store the quotient in dst+xsize,size.
+C Extend the division to fractional quotient limbs in dst,xsize.  Return the
+C remainder.  Either or both xsize and size can be 0.
+C
+C mpn_divrem_1c takes a carry parameter which is an initial high limb,
+C effectively one extra limb at the top of src,size.  Must have
+C carry<divisor.
+C
+C
+C Essentially the code is the same as the division based part of
+C mpn/generic/divrem_1.c, but has the advantage that we get the desired divl
+C instruction even when gcc is not being used (when longlong.h only has the
+C rather slow generic C udiv_qrnnd().
+C
+C A test is done to see if the high limb is less than the divisor, and if so
+C one less div is done.  A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount.  The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C Notes for P5:
+C
+C It might be thought that moving the load down to pair with the store would
+C save 1 cycle, but that doesn't seem to happen in practice, and in any case
+C would be a mere 2.2% saving, so it's hardly worth bothering about.
+C
+C A mul-by-inverse might be a possibility for P5, as done in
+C mpn/x86/pentium/mod_1.asm.  The number of auxiliary instructions required
+C is a hinderance, but there could be a 10-15% speedup available.
+C
+C
+C Notes for K6:
+C
+C K6 has its own version of this code, using loop and paying attention to
+C cache line boundary crossings.  The target 20 c/l can be had with the
+C decl+jnz of the present code by pairing up the load and store in the
+C loops.  But it's considered easier not to introduce complexity just for
+C that, but instead let k6 have its own code.
+C
+
+defframe(PARAM_CARRY,  24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+	TEXT
+	ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	pushl	%edi		FRAME_pushl()
+
+	movl	PARAM_SRC, %edi
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_DIVISOR, %esi
+	pushl	%ebx		FRAME_pushl()
+
+	movl	PARAM_DST, %ebx
+	pushl	%ebp		FRAME_pushl()
+
+	movl	PARAM_XSIZE, %ebp
+	orl	%ecx, %ecx
+
+	movl	PARAM_CARRY, %edx
+	jz	L(fraction)
+
+	leal	-4(%ebx,%ebp,4), %ebx	C dst one limb below integer part
+	jmp	L(integer_top)
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	pushl	%edi		FRAME_pushl()
+
+	movl	PARAM_SRC, %edi
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_DIVISOR, %esi
+	orl	%ecx,%ecx
+
+	jz	L(size_zero)
+	pushl	%ebx		FRAME_pushl()
+
+	movl	-4(%edi,%ecx,4), %eax	C src high limb
+	xorl	%edx, %edx
+
+	movl	PARAM_DST, %ebx
+	pushl	%ebp		FRAME_pushl()
+
+	movl	PARAM_XSIZE, %ebp
+	cmpl	%esi, %eax
+
+	leal	-4(%ebx,%ebp,4), %ebx	C dst one limb below integer part
+	jae	L(integer_entry)
+
+
+	C high<divisor, so high of dst is zero, and avoid one div
+
+	movl	%edx, (%ebx,%ecx,4)
+	decl	%ecx
+
+	movl	%eax, %edx
+	jz	L(fraction)
+
+
+L(integer_top):
+	C eax	scratch (quotient)
+	C ebx	dst+4*xsize-4
+	C ecx	counter
+	C edx	scratch (remainder)
+	C esi	divisor
+	C edi	src
+	C ebp	xsize
+
+	movl	-4(%edi,%ecx,4), %eax
+L(integer_entry):
+
+	divl	%esi
+
+	movl	%eax, (%ebx,%ecx,4)
+	decl	%ecx
+	jnz	L(integer_top)
+
+
+L(fraction):
+	orl	%ebp, %ecx
+	jz	L(done)
+
+	movl	PARAM_DST, %ebx
+
+
+L(fraction_top):
+	C eax	scratch (quotient)
+	C ebx	dst
+	C ecx	counter
+	C edx	scratch (remainder)
+	C esi	divisor
+	C edi
+	C ebp
+
+	xorl	%eax, %eax
+
+	divl	%esi
+
+	movl	%eax, -4(%ebx,%ecx,4)
+	decl	%ecx
+	jnz	L(fraction_top)
+
+
+L(done):
+	popl	%ebp
+	movl	%edx, %eax
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
+L(size_zero):
+deflit(`FRAME',8)
+	movl	PARAM_XSIZE, %ecx
+	xorl	%eax, %eax
+
+	movl	PARAM_DST, %edi
+
+	cld	C better safe than sorry, see mpn/x86/README
+
+	rep
+	stosl
+
+	popl	%esi
+	popl	%edi
+	ret
+EPILOGUE()

diff --git a/mpn/x86/divrem_2.asm b/mpn/x86/divrem_2.asm
new file mode 100644
index 0000000..8da72c7
--- /dev/null
+++ b/mpn/x86/divrem_2.asm

@@ -0,0 +1,199 @@
+dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		norm	frac
+C 486
+C P5
+C P6-13		29.2
+C P6-15		*26
+C K6
+C K7		22
+C K8		*19
+C P4-f1
+C P4-f2		*65
+C P4-f3
+C P4-f4		*72
+
+C A star means numbers not updated for the latest version of the code.
+
+
+C TODO
+C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
+C  * The loop has not been carefully tuned.  We should at the very least do
+C    some local insn swapping.
+C  * The code outside the main loop is what gcc generated.  Clean up!
+C  * Clean up stack slot usage.
+
+C INPUT PARAMETERS
+C qp
+C fn
+C up_param
+C un_param
+C dp
+
+
+C eax ebx ecx edx esi edi ebp
+C         cnt         qp
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_divrem_2)
+	push	%ebp
+	push	%edi
+	push	%esi
+	push	%ebx
+	sub	$36, %esp
+	mov	68(%esp), %ecx		C un
+	mov	72(%esp), %esi		C dp
+	movl	$0, 32(%esp)
+	lea	0(,%ecx,4), %edi
+	add	64(%esp), %edi		C up
+	mov	(%esi), %ebx
+	mov	4(%esi), %eax
+	mov	%ebx, 20(%esp)
+	sub	$12, %edi
+	mov	%eax, 24(%esp)
+	mov	%edi, 12(%esp)
+	mov	8(%edi), %ebx
+	mov	4(%edi), %ebp
+	cmp	%eax, %ebx
+	jb	L(8)
+	seta	%dl
+	cmp	20(%esp), %ebp
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	jne	L(35)
+L(8):
+	mov	60(%esp), %esi		C fn
+	lea	-3(%esi,%ecx), %edi
+	test	%edi, %edi
+	js	L(9)
+	mov	24(%esp), %edx
+	mov	$-1, %esi
+	mov	%esi, %eax
+	mov	%esi, %ecx
+	not	%edx
+	divl	24(%esp)
+	mov	%eax, %esi
+	imul	24(%esp), %eax
+	mov	%eax, (%esp)
+	mov	%esi, %eax
+	mull	20(%esp)
+	mov	(%esp), %eax
+	add	20(%esp), %eax
+	adc	$0, %ecx
+	add	%eax, %edx
+	adc	$0, %ecx
+	mov	%ecx, %eax
+	js	L(32)
+L(36):	dec	%esi
+	sub	24(%esp), %edx
+	sbb	$0, %eax
+	jns	L(36)
+L(32):
+	mov	%esi, 16(%esp)		C di
+	mov	%edi, %ecx		C un
+	mov	12(%esp), %esi		C up
+	mov	24(%esp), %eax
+	neg	%eax
+	mov	%eax, 4(%esp)		C -d1
+	ALIGN(16)
+	nop
+
+C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
+C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
+
+L(loop):
+	mov	16(%esp), %eax		C di
+	mul	%ebx
+	add	%ebp, %eax
+	mov	%eax, (%esp)		C q0
+	adc	%ebx, %edx
+	mov	%edx, %edi		C q
+	imul	4(%esp), %edx
+	mov	20(%esp), %eax
+	lea	(%edx, %ebp), %ebx	C n1 -= ...
+	mul	%edi
+	xor	%ebp, %ebp
+	cmp	60(%esp), %ecx
+	jl	L(19)
+	mov	(%esi), %ebp
+	sub	$4, %esi
+L(19):	sub	20(%esp), %ebp
+	sbb	24(%esp), %ebx
+	sub	%eax, %ebp
+	sbb	%edx, %ebx
+	mov	20(%esp), %eax		C d1
+	inc	%edi
+	xor	%edx, %edx
+	cmp	(%esp), %ebx
+	adc	$-1, %edx		C mask
+	add	%edx, %edi		C q--
+	and	%edx, %eax		C d0 or 0
+	and	24(%esp), %edx		C d1 or 0
+	add	%eax, %ebp
+	adc	%edx, %ebx
+	cmp	24(%esp), %ebx
+	jae	L(fix)
+L(bck):	mov	56(%esp), %edx
+	mov	%edi, (%edx, %ecx, 4)
+	dec	%ecx
+	jns	L(loop)
+
+L(9):	mov	64(%esp), %esi		C up
+	mov	%ebp, (%esi)
+	mov	%ebx, 4(%esi)
+	mov	32(%esp), %eax
+	add	$36, %esp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	pop	%ebp
+	ret
+
+L(fix):	seta	%dl
+	cmp	20(%esp), %ebp
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	je	L(bck)
+	inc	%edi
+	sub	20(%esp), %ebp
+	sbb	24(%esp), %ebx
+	jmp	L(bck)
+
+L(35):	sub	20(%esp), %ebp
+	sbb	24(%esp), %ebx
+	movl	$1, 32(%esp)
+	jmp	L(8)
+EPILOGUE()

diff --git a/mpn/x86/gcd_11.asm b/mpn/x86/gcd_11.asm
new file mode 100644
index 0000000..699996e
--- /dev/null
+++ b/mpn/x86/gcd_11.asm

@@ -0,0 +1,126 @@
+dnl  x86 mpn_gcd_11 optimised for processors with slow BSF.
+
+dnl  Based on C version.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+dnl  Rudimentary code for x86-32, i.e. for CPUs without cmov.  Also, the bsf
+dnl  instruction is assumed to be so slow it is useless.  Instead a teble is
+dnl  used.
+dnl
+dnl  The loop benefits from OoO, in-order CPUs might want a different loop.
+dnl  The ebx and ecx registers could be combined if the assigment of ecx were
+dnl  postponed until ebx died, but that would at least hurt in-order CPUs.
+
+C	     cycles/bit (approx)
+C AMD K7	 ?
+C AMD K8,K9	 ?
+C AMD K10	 ?
+C AMD bd1	 ?
+C AMD bd2	 ?
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD bt1	 ?
+C AMD bt2	 ?
+C AMD zn1	 ?
+C AMD zn2	 ?
+C Intel P4-2	 ?
+C Intel P4-3/4	 ?
+C Intel P6/13	 ?
+C Intel CNR	 ?
+C Intel NHM	 ?
+C Intel SBR	 ?
+C Intel IBR	 ?
+C Intel HWL	 ?
+C Intel BWL	 ?
+C Intel SKL	 ?
+C Intel atom	 ?
+C Intel SLM	 ?
+C Intel GLM	 ?
+C Intel GLM+	 ?
+C VIA nano	 ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+deflit(MAXSHIFT, 6)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+DEF_OBJECT(ctz_table,64)
+	.byte	MAXSHIFT
+forloop(i,1,MASK,
+`	.byte	m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+define(`u0',    `%eax')
+define(`v0',    `%edx')
+
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	push	%edi
+	push	%esi
+	push	%ebx
+
+	mov	16(%esp), u0
+	mov	20(%esp), v0
+	LEAL(	ctz_table, %esi)
+	sub	v0, u0			C u = u - v		0
+	jz	L(end)
+
+	ALIGN(16)
+L(top):	sbb	%ebx, %ebx		C mask			1
+	mov	u0, %edi		C			1
+	mov	u0, %ecx		C			1
+	and	%ebx, %edi		C			2
+	xor	%ebx, u0		C			2
+	add	%edi, v0		C v = min(u.v)		3
+	sub	%ebx, u0		C u = |u - v|		3
+L(mid):	and	$MASK, %ecx		C			2
+	movzbl	(%esi,%ecx), %ecx	C			3
+	jz	L(shift_alot)
+	shr	%cl, u0			C			4
+	sub	v0, u0			C u = u - v		0,5
+	jnz	L(top)
+
+L(end):	mov	v0, %eax
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+
+L(shift_alot):
+	shr	$MAXSHIFT, u0
+	mov	u0, %ecx
+	jmp	L(mid)
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86/gmp-mparam.h b/mpn/x86/gmp-mparam.h
new file mode 100644
index 0000000..2cb1984
--- /dev/null
+++ b/mpn/x86/gmp-mparam.h

@@ -0,0 +1,38 @@
+/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+
+/* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
+   of p5, p6, k6 and k7, so use it always.  It's probably slower on 386 and
+   486, but that's too bad.  */
+#define DIVEXACT_1_THRESHOLD  0

diff --git a/mpn/x86/lshift.asm b/mpn/x86/lshift.asm
new file mode 100644
index 0000000..bdfc987
--- /dev/null
+++ b/mpn/x86/lshift.asm

@@ -0,0 +1,106 @@
+dnl  x86 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb
+C P54	 7.5
+C P55	 7.0
+C P6	 2.5
+C K6	 4.5
+C K7	 5.0
+C P4	14.5
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+deflit(`FRAME',12)
+
+	movl	PARAM_DST,%edi
+	movl	PARAM_SRC,%esi
+	movl	PARAM_SIZE,%edx
+	movl	PARAM_SHIFT,%ecx
+
+	subl	$4,%esi			C adjust src
+
+	movl	(%esi,%edx,4),%ebx	C read most significant limb
+	xorl	%eax,%eax
+	shldl(	%cl, %ebx, %eax)	C compute carry limb
+	decl	%edx
+	jz	L(end)
+	pushl	%eax			C push carry limb onto stack
+	testb	$1,%dl
+	jnz	L(1)			C enter loop in the middle
+	movl	%ebx,%eax
+
+	ALIGN(8)
+L(oop):	movl	(%esi,%edx,4),%ebx	C load next lower limb
+	shldl(	%cl, %ebx, %eax)	C compute result limb
+	movl	%eax,(%edi,%edx,4)	C store it
+	decl	%edx
+L(1):	movl	(%esi,%edx,4),%eax
+	shldl(	%cl, %eax, %ebx)
+	movl	%ebx,(%edi,%edx,4)
+	decl	%edx
+	jnz	L(oop)
+
+	shll	%cl,%eax		C compute least significant limb
+	movl	%eax,(%edi)		C store it
+
+	popl	%eax			C pop carry limb
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+L(end):	shll	%cl,%ebx		C compute least significant limb
+	movl	%ebx,(%edi)		C store it
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/mod_34lsub1.asm b/mpn/x86/mod_34lsub1.asm
new file mode 100644
index 0000000..5a6c653
--- /dev/null
+++ b/mpn/x86/mod_34lsub1.asm

@@ -0,0 +1,183 @@
+dnl  Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
+
+dnl  Copyright 2000-2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C      cycles/limb
+C P5	  3.0
+C P6	  3.66
+C K6	  3.0
+C K7	  1.3
+C P4	  9
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_SRC')
+
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	movl	PARAM_SRC, %edx
+
+	subl	$2, %ecx
+	ja	L(three_or_more)
+
+	movl	(%edx), %eax
+	jb	L(one)
+
+	movl	4(%edx), %ecx
+	movl	%eax, %edx
+	shrl	$24, %eax		C src[0] low
+
+	andl	$0xFFFFFF, %edx		C src[0] high
+	addl	%edx, %eax
+	movl	%ecx, %edx
+
+	andl	$0xFFFF, %ecx
+	shrl	$16, %edx		C src[1] high
+	addl	%edx, %eax
+
+	shll	$8, %ecx		C src[1] low
+	addl	%ecx, %eax
+
+L(one):
+	ret
+
+
+L(three_or_more):
+	C eax
+	C ebx
+	C ecx	size-2
+	C edx	src
+	C esi
+	C edi
+	C ebp
+
+	movl	%ebx, SAVE_EBX		C and arrange 16-byte loop alignment
+	xorl	%ebx, %ebx
+
+	pushl	%esi	FRAME_pushl()
+	xorl	%esi, %esi
+
+	pushl	%edi	FRAME_pushl()
+	xorl	%eax, %eax		C and clear carry flag
+
+
+	C offset 0x40 here
+L(top):
+	C eax	acc 0mod3
+	C ebx	acc 1mod3
+	C ecx	counter, limbs
+	C edx	src
+	C esi	acc 2mod3
+	C edi
+	C ebp
+
+	leal	12(%edx), %edx
+	leal	-2(%ecx), %ecx
+
+	adcl	-12(%edx), %eax
+	adcl	-8(%edx), %ebx
+	adcl	-4(%edx), %esi
+
+	decl	%ecx
+	jg	L(top)
+
+
+	C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
+
+	movl	$0xFFFFFFFF, %edi
+	incl	%ecx
+	js	L(combine)
+
+	adcl	(%edx), %eax
+	movl	$0xFFFFFF00, %edi
+	decl	%ecx
+	js	L(combine)
+
+	adcl	4(%edx), %ebx
+	movl	$0xFFFF0000, %edi
+
+
+L(combine):
+	C eax	acc 0mod3
+	C ebx	acc 1mod3
+	C ecx
+	C edx
+	C esi	acc 2mod3
+	C edi	mask
+	C ebp
+
+	sbbl	%ecx, %ecx		C carry
+	movl	%eax, %edx		C 0mod3
+
+	shrl	$24, %eax		C 0mod3 high
+	andl	%edi, %ecx		C carry masked
+
+	subl	%ecx, %eax		C apply carry
+	movl	%ebx, %edi		C 1mod3
+
+	shrl	$16, %ebx		C 1mod3 high
+	andl	$0x00FFFFFF, %edx	C 0mod3 low
+
+	addl	%edx, %eax		C apply 0mod3 low
+	andl	$0xFFFF, %edi
+
+	shll	$8, %edi		C 1mod3 low
+	addl	%ebx, %eax		C apply 1mod3 high
+
+	addl	%edi, %eax		C apply 1mod3 low
+	movl	%esi, %edx		C 2mod3
+
+	shrl	$8, %esi		C 2mod3 high
+	andl	$0xFF, %edx		C 2mod3 low
+
+	shll	$16, %edx		C 2mod3 low
+	addl	%esi, %eax		C apply 2mod3 high
+
+	addl	%edx, %eax		C apply 2mod3 low
+	popl	%edi	FRAME_popl()
+
+	movl	SAVE_EBX, %ebx
+	popl	%esi	FRAME_popl()
+
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/mul_1.asm b/mpn/x86/mul_1.asm
new file mode 100644
index 0000000..e760ccb
--- /dev/null
+++ b/mpn/x86/mul_1.asm

@@ -0,0 +1,140 @@
+dnl  x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
+dnl  with a limb and store the result in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C			    cycles/limb
+C P5				12.5
+C P6 model 0-8,10-12		 5.5
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)		 5.25
+C P4 model 0  (Willamette)	19.0
+C P4 model 1  (?)		19.0
+C P4 model 2  (Northwood)	19.0
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C AMD K6			10.5
+C AMD K7			 4.5
+C AMD K8
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+deflit(`FRAME',16)
+
+	movl	PARAM_DST,%edi
+	movl	PARAM_SRC,%esi
+	movl	PARAM_SIZE,%ecx
+
+	xorl	%ebx,%ebx
+	andl	$3,%ecx
+	jz	L(end0)
+
+L(oop0):
+	movl	(%esi),%eax
+	mull	PARAM_MULTIPLIER
+	leal	4(%esi),%esi
+	addl	%ebx,%eax
+	movl	$0,%ebx
+	adcl	%ebx,%edx
+	movl	%eax,(%edi)
+	movl	%edx,%ebx	C propagate carry into cylimb
+
+	leal	4(%edi),%edi
+	decl	%ecx
+	jnz	L(oop0)
+
+L(end0):
+	movl	PARAM_SIZE,%ecx
+	shrl	$2,%ecx
+	jz	L(end)
+
+
+	ALIGN(8)
+L(oop):	movl	(%esi),%eax
+	mull	PARAM_MULTIPLIER
+	addl	%eax,%ebx
+	movl	$0,%ebp
+	adcl	%edx,%ebp
+
+	movl	4(%esi),%eax
+	mull	PARAM_MULTIPLIER
+	movl	%ebx,(%edi)
+	addl	%eax,%ebp	C new lo + cylimb
+	movl	$0,%ebx
+	adcl	%edx,%ebx
+
+	movl	8(%esi),%eax
+	mull	PARAM_MULTIPLIER
+	movl	%ebp,4(%edi)
+	addl	%eax,%ebx	C new lo + cylimb
+	movl	$0,%ebp
+	adcl	%edx,%ebp
+
+	movl	12(%esi),%eax
+	mull	PARAM_MULTIPLIER
+	movl	%ebx,8(%edi)
+	addl	%eax,%ebp	C new lo + cylimb
+	movl	$0,%ebx
+	adcl	%edx,%ebx
+
+	movl	%ebp,12(%edi)
+
+	leal	16(%esi),%esi
+	leal	16(%edi),%edi
+	decl	%ecx
+	jnz	L(oop)
+
+L(end):	movl	%ebx,%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/mul_basecase.asm b/mpn/x86/mul_basecase.asm
new file mode 100644
index 0000000..82a2980
--- /dev/null
+++ b/mpn/x86/mul_basecase.asm

@@ -0,0 +1,223 @@
+dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
+dnl  in a third limb vector.
+
+dnl  Copyright 1996-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/crossproduct
+C P5	  15
+C P6	   7.5
+C K6	  12.5
+C K7	   5.5
+C P4	  24
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+C
+C This was written in a haste since the Pentium optimized code that was used
+C for all x86 machines was slow for the Pentium II.  This code would benefit
+C from some cleanup.
+C
+C To shave off some percentage of the run-time, one should make 4 variants
+C of the Louter loop, for the four different outcomes of un mod 4.  That
+C would avoid Loop0 altogether.  Code expansion would be > 4-fold for that
+C part of the function, but since it is not very large, that would be
+C acceptable.
+C
+C The mul loop (at L(oopM)) might need some tweaking.  It's current speed is
+C unknown.
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP,   16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+defframe(VAR_MULTIPLIER, -4)
+defframe(VAR_COUNTER,    -8)
+deflit(VAR_STACK_SPACE,  8)
+
+	TEXT
+	ALIGN(8)
+
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+	subl	$VAR_STACK_SPACE,%esp
+	pushl	%esi
+	pushl	%ebp
+	pushl	%edi
+deflit(`FRAME',eval(VAR_STACK_SPACE+12))
+
+	movl	PARAM_XP,%esi
+	movl	PARAM_WP,%edi
+	movl	PARAM_YP,%ebp
+
+	movl	(%esi),%eax		C load xp[0]
+	mull	(%ebp)			C multiply by yp[0]
+	movl	%eax,(%edi)		C store to wp[0]
+	movl	PARAM_XSIZE,%ecx	C xsize
+	decl	%ecx			C If xsize = 1, ysize = 1 too
+	jz	L(done)
+
+	pushl	%ebx
+FRAME_pushl()
+	movl	%edx,%ebx
+
+	leal	4(%esi),%esi
+	leal	4(%edi),%edi
+
+L(oopM):
+	movl	(%esi),%eax		C load next limb at xp[j]
+	leal	4(%esi),%esi
+	mull	(%ebp)
+	addl	%ebx,%eax
+	movl	%edx,%ebx
+	adcl	$0,%ebx
+	movl	%eax,(%edi)
+	leal	4(%edi),%edi
+	decl	%ecx
+	jnz	L(oopM)
+
+	movl	%ebx,(%edi)		C most significant limb of product
+	addl	$4,%edi			C increment wp
+	movl	PARAM_XSIZE,%eax
+	shll	$2,%eax
+	subl	%eax,%edi
+	subl	%eax,%esi
+
+	movl	PARAM_YSIZE,%eax	C ysize
+	decl	%eax
+	jz	L(skip)
+	movl	%eax,VAR_COUNTER	C set index i to ysize
+
+L(outer):
+	movl	PARAM_YP,%ebp		C yp
+	addl	$4,%ebp			C make ebp point to next v limb
+	movl	%ebp,PARAM_YP
+	movl	(%ebp),%eax		C copy y limb ...
+	movl	%eax,VAR_MULTIPLIER	C ... to stack slot
+	movl	PARAM_XSIZE,%ecx
+
+	xorl	%ebx,%ebx
+	andl	$3,%ecx
+	jz	L(end0)
+
+L(oop0):
+	movl	(%esi),%eax
+	mull	VAR_MULTIPLIER
+	leal	4(%esi),%esi
+	addl	%ebx,%eax
+	movl	$0,%ebx
+	adcl	%ebx,%edx
+	addl	%eax,(%edi)
+	adcl	%edx,%ebx		C propagate carry into cylimb
+
+	leal	4(%edi),%edi
+	decl	%ecx
+	jnz	L(oop0)
+
+L(end0):
+	movl	PARAM_XSIZE,%ecx
+	shrl	$2,%ecx
+	jz	L(endX)
+
+	ALIGN(8)
+L(oopX):
+	movl	(%esi),%eax
+	mull	VAR_MULTIPLIER
+	addl	%eax,%ebx
+	movl	$0,%ebp
+	adcl	%edx,%ebp
+
+	movl	4(%esi),%eax
+	mull	VAR_MULTIPLIER
+	addl	%ebx,(%edi)
+	adcl	%eax,%ebp	C new lo + cylimb
+	movl	$0,%ebx
+	adcl	%edx,%ebx
+
+	movl	8(%esi),%eax
+	mull	VAR_MULTIPLIER
+	addl	%ebp,4(%edi)
+	adcl	%eax,%ebx	C new lo + cylimb
+	movl	$0,%ebp
+	adcl	%edx,%ebp
+
+	movl	12(%esi),%eax
+	mull	VAR_MULTIPLIER
+	addl	%ebx,8(%edi)
+	adcl	%eax,%ebp	C new lo + cylimb
+	movl	$0,%ebx
+	adcl	%edx,%ebx
+
+	addl	%ebp,12(%edi)
+	adcl	$0,%ebx		C propagate carry into cylimb
+
+	leal	16(%esi),%esi
+	leal	16(%edi),%edi
+	decl	%ecx
+	jnz	L(oopX)
+
+L(endX):
+	movl	%ebx,(%edi)
+	addl	$4,%edi
+
+	C we incremented wp and xp in the loop above; compensate
+	movl	PARAM_XSIZE,%eax
+	shll	$2,%eax
+	subl	%eax,%edi
+	subl	%eax,%esi
+
+	movl	VAR_COUNTER,%eax
+	decl	%eax
+	movl	%eax,VAR_COUNTER
+	jnz	L(outer)
+
+L(skip):
+	popl	%ebx
+	popl	%edi
+	popl	%ebp
+	popl	%esi
+	addl	$8,%esp
+	ret
+
+L(done):
+	movl	%edx,4(%edi)	   C store to wp[1]
+	popl	%edi
+	popl	%ebp
+	popl	%esi
+	addl	$8,%esp
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/rshift.asm b/mpn/x86/rshift.asm
new file mode 100644
index 0000000..dd5b7ce
--- /dev/null
+++ b/mpn/x86/rshift.asm

@@ -0,0 +1,108 @@
+dnl  x86 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C     cycles/limb
+C P54	 7.5
+C P55	 7.0
+C P6	 2.5
+C K6	 4.5
+C K7	 5.0
+C P4	16.5
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+deflit(`FRAME',12)
+
+	movl	PARAM_DST,%edi
+	movl	PARAM_SRC,%esi
+	movl	PARAM_SIZE,%edx
+	movl	PARAM_SHIFT,%ecx
+
+	leal	-4(%edi,%edx,4),%edi
+	leal	(%esi,%edx,4),%esi
+	negl	%edx
+
+	movl	(%esi,%edx,4),%ebx	C read least significant limb
+	xorl	%eax,%eax
+	shrdl(	%cl, %ebx, %eax)	C compute carry limb
+	incl	%edx
+	jz	L(end)
+	pushl	%eax			C push carry limb onto stack
+	testb	$1,%dl
+	jnz	L(1)			C enter loop in the middle
+	movl	%ebx,%eax
+
+	ALIGN(8)
+L(oop):	movl	(%esi,%edx,4),%ebx	C load next higher limb
+	shrdl(	%cl, %ebx, %eax)	C compute result limb
+	movl	%eax,(%edi,%edx,4)	C store it
+	incl	%edx
+L(1):	movl	(%esi,%edx,4),%eax
+	shrdl(	%cl, %eax, %ebx)
+	movl	%ebx,(%edi,%edx,4)
+	incl	%edx
+	jnz	L(oop)
+
+	shrl	%cl,%eax		C compute most significant limb
+	movl	%eax,(%edi)		C store it
+
+	popl	%eax			C pop carry limb
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+L(end):	shrl	%cl,%ebx		C compute most significant limb
+	movl	%ebx,(%edi)		C store it
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/sec_tabselect.asm b/mpn/x86/sec_tabselect.asm
new file mode 100644
index 0000000..a71ff16
--- /dev/null
+++ b/mpn/x86/sec_tabselect.asm

@@ -0,0 +1,106 @@
+dnl  x86 mpn_sec_tabselect.
+
+dnl  Copyright 2011, 2021 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9  (Banias)		 ?
+C P6 model 13 (Dothan)		 ?
+C P4 model 0  (Willamette)	 ?
+C P4 model 1  (?)		 ?
+C P4 model 2  (Northwood)	 4.5
+C P4 model 3  (Prescott)	 ?
+C P4 model 4  (Nocona)		 ?
+C Intel Atom			 ?
+C AMD K6			 ?
+C AMD K7			 3.4
+C AMD K8			 ?
+C AMD K10			 ?
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using SSE2 could result in many-fold speedup.
+
+C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `%edi')
+define(`tp',     `%esi')
+define(`n',      `%ebx')
+define(`nents',  `32(%esp)')
+define(`which',  `36(%esp)')
+
+define(`i',      `%ebp')
+define(`mask',   `%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_sec_tabselect)
+	push	%edi
+	push	%esi
+	push	%ebx
+	push	%ebp
+	mov	20(%esp), rp
+	mov	24(%esp), tp
+	mov	28(%esp), n
+
+	lea	(rp,n,4), rp
+	lea	(tp,n,4), tp
+L(outer):
+	subl	$1, which
+	sbb	mask, mask
+
+	mov	n, i
+	neg	i
+
+	ALIGN(16)
+L(top):	mov	(tp,i,4), %eax
+	mov	(rp,i,4), %edx
+	xor	%edx, %eax
+	and	mask, %eax
+	xor	%edx, %eax
+	mov	%eax, (rp,i,4)
+	inc	i
+	js	L(top)
+
+L(end):	lea	(tp,n,4), tp
+	decl	nents
+	jne	L(outer)
+
+L(outer_end):
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()

diff --git a/mpn/x86/sqr_basecase.asm b/mpn/x86/sqr_basecase.asm
new file mode 100644
index 0000000..7027f99
--- /dev/null
+++ b/mpn/x86/sqr_basecase.asm

@@ -0,0 +1,359 @@
+dnl  x86 generic mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+include(`config.m4')
+
+
+C     cycles/crossproduct  cycles/triangleproduct
+C P5
+C P6
+C K6
+C K7
+C P4
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the size is
+C small.
+C
+C The mul1 loop is not unrolled like mul_1.asm, it doesn't seem worth the
+C code size to do so here.
+C
+C Enhancements:
+C
+C The addmul loop here is also not unrolled like aorsmul_1.asm and
+C mul_basecase.asm are.  Perhaps it should be done.  It'd add to the
+C complexity, but if it's worth doing in the other places then it should be
+C worthwhile here.
+C
+C A fully-unrolled style like other sqr_basecase.asm versions (k6, k7, p6)
+C might be worth considering.  That'd add quite a bit to the code size, but
+C only as much as is used would be dragged into L1 cache.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %edx
+
+	movl	PARAM_SRC, %eax
+
+	cmpl	$2, %edx
+	movl	PARAM_DST, %ecx
+
+	je	L(two_limbs)
+	ja	L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+	C eax	src
+	C ebx
+	C ecx	dst
+	C edx
+
+	movl	(%eax), %eax
+	mull	%eax
+	movl	%eax, (%ecx)
+	movl	%edx, 4(%ecx)
+	ret
+
+
+C -----------------------------------------------------------------------------
+	ALIGN(8)
+L(two_limbs):
+	C eax	src
+	C ebx
+	C ecx	dst
+	C edx
+
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	%eax, %ebx
+	movl	(%eax), %eax
+
+	mull	%eax		C src[0]^2
+
+	pushl	%esi
+	pushl	%edi
+
+	movl	%edx, %esi	C dst[1]
+	movl	%eax, (%ecx)	C dst[0]
+
+	movl	4(%ebx), %eax
+	mull	%eax		C src[1]^2
+
+	movl	%eax, %edi	C dst[2]
+	movl	%edx, %ebp	C dst[3]
+
+	movl	(%ebx), %eax
+	mull	4(%ebx)		C src[0]*src[1]
+
+	addl	%eax, %esi
+
+	adcl	%edx, %edi
+
+	adcl	$0, %ebp
+	addl	%esi, %eax
+
+	adcl	%edi, %edx
+	movl	%eax, 4(%ecx)
+
+	adcl	$0, %ebp
+
+	movl	%edx, 8(%ecx)
+	movl	%ebp, 12(%ecx)
+
+	popl	%edi
+	popl	%esi
+
+	popl	%ebp
+	popl	%ebx
+
+	ret
+
+
+C -----------------------------------------------------------------------------
+	ALIGN(8)
+L(three_or_more):
+deflit(`FRAME',0)
+	C eax	src
+	C ebx
+	C ecx	dst
+	C edx	size
+
+	pushl	%ebx	FRAME_pushl()
+	pushl	%edi	FRAME_pushl()
+
+	pushl	%esi	FRAME_pushl()
+	pushl	%ebp	FRAME_pushl()
+
+	leal	(%ecx,%edx,4), %edi	C &dst[size], end of this mul1
+	leal	(%eax,%edx,4), %esi	C &src[size]
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+	movl	(%eax), %ebp		C src[0], multiplier
+	movl	%edx, %ecx
+
+	negl	%ecx			C -size
+	xorl	%ebx, %ebx		C clear carry limb
+
+	incl	%ecx			C -(size-1)
+
+L(mul1):
+	C eax	scratch
+	C ebx	carry
+	C ecx	counter, limbs, negative
+	C edx	scratch
+	C esi	&src[size]
+	C edi	&dst[size]
+	C ebp	multiplier
+
+	movl	(%esi,%ecx,4), %eax
+	mull	%ebp
+	addl	%eax, %ebx
+	adcl	$0, %edx
+	movl	%ebx, (%edi,%ecx,4)
+	movl	%edx, %ebx
+	incl	%ecx
+	jnz	L(mul1)
+
+	movl	%ebx, (%edi)
+
+
+	C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
+	C n=1..size-2.
+	C
+	C The last products src[size-2]*src[size-1], which is the end corner
+	C of the product triangle, is handled separately at the end to save
+	C looping overhead.  If size is 3 then it's only this that needs to
+	C be done.
+	C
+	C In the outer loop %esi is a constant, and %edi just advances by 1
+	C limb each time.  The size of the operation decreases by 1 limb
+	C each time.
+
+	C eax
+	C ebx	carry (needing carry flag added)
+	C ecx
+	C edx
+	C esi	&src[size]
+	C edi	&dst[size]
+	C ebp
+
+	movl	PARAM_SIZE, %ecx
+	subl	$3, %ecx
+	jz	L(corner)
+
+	negl	%ecx
+
+dnl  re-use parameter space
+define(VAR_OUTER,`PARAM_DST')
+
+L(outer):
+	C eax
+	C ebx
+	C ecx
+	C edx	outer loop counter, -(size-3) to -1
+	C esi	&src[size]
+	C edi	dst, pointing at stored carry limb of previous loop
+	C ebp
+
+	movl	%ecx, VAR_OUTER
+	addl	$4, %edi		C advance dst end
+
+	movl	-8(%esi,%ecx,4), %ebp	C next multiplier
+	subl	$1, %ecx
+
+	xorl	%ebx, %ebx		C initial carry limb
+
+L(inner):
+	C eax	scratch
+	C ebx	carry (needing carry flag added)
+	C ecx	counter, -n-1 to -1
+	C edx	scratch
+	C esi	&src[size]
+	C edi	dst end of this addmul
+	C ebp	multiplier
+
+	movl	(%esi,%ecx,4), %eax
+	mull	%ebp
+	addl	%ebx, %eax
+	adcl	$0, %edx
+	addl	%eax, (%edi,%ecx,4)
+	adcl	$0, %edx
+	movl	%edx, %ebx
+	addl	$1, %ecx
+	jl	L(inner)
+
+
+	movl	%ebx, (%edi)
+	movl	VAR_OUTER, %ecx
+	incl	%ecx
+	jnz	L(outer)
+
+
+L(corner):
+	C esi	&src[size]
+	C edi	&dst[2*size-3]
+
+	movl	-4(%esi), %eax
+	mull	-8(%esi)		C src[size-1]*src[size-2]
+	addl	%eax, 0(%edi)
+	adcl	$0, %edx
+	movl	%edx, 4(%edi)		C dst high limb
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+	movl	PARAM_SIZE, %eax
+	negl	%eax
+	addl	$1, %eax		C -(size-1) and clear carry
+
+L(lshift):
+	C eax	counter, negative
+	C ebx	next limb
+	C ecx
+	C edx
+	C esi
+	C edi	&dst[2*size-4]
+	C ebp
+
+	rcll	8(%edi,%eax,8)
+	rcll	12(%edi,%eax,8)
+	incl	%eax
+	jnz	L(lshift)
+
+
+	adcl	%eax, %eax		C high bit out
+	movl	%eax, 8(%edi)		C dst most significant limb
+
+
+C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+	movl	PARAM_SRC, %esi
+	movl	(%esi), %eax		C src[0]
+	mull	%eax			C src[0]^2
+
+	movl	PARAM_SIZE, %ecx
+	leal	(%esi,%ecx,4), %esi	C src end
+
+	negl	%ecx			C -size
+	movl	%edx, %ebx		C initial carry
+
+	movl	%eax, 12(%edi,%ecx,8)	C dst[0]
+	incl	%ecx			C -(size-1)
+
+L(diag):
+	C eax	scratch (low product)
+	C ebx	carry limb
+	C ecx	counter, -(size-1) to -1
+	C edx	scratch (high product)
+	C esi	&src[size]
+	C edi	&dst[2*size-3]
+	C ebp	scratch (fetched dst limbs)
+
+	movl	(%esi,%ecx,4), %eax
+	mull	%eax
+
+	addl	%ebx, 8(%edi,%ecx,8)
+	movl	%edx, %ebx
+
+	adcl	%eax, 12(%edi,%ecx,8)
+	adcl	$0, %ebx
+
+	incl	%ecx
+	jnz	L(diag)
+
+
+	addl	%ebx, 8(%edi)		C dst most significant limb
+
+	popl	%ebp
+	popl	%esi
+
+	popl	%edi
+	popl	%ebx
+
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86/udiv.asm b/mpn/x86/udiv.asm
new file mode 100644
index 0000000..85a68a8
--- /dev/null
+++ b/mpn/x86/udiv.asm

@@ -0,0 +1,52 @@
+dnl  x86 mpn_udiv_qrnnd -- 2 by 1 limb division
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low,
+C                           mp_limb_t divisor);
+
+defframe(PARAM_DIVISOR, 16)
+defframe(PARAM_LOW,     12)
+defframe(PARAM_HIGH,    8)
+defframe(PARAM_REMPTR,  4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_udiv_qrnnd)
+deflit(`FRAME',0)
+	movl	PARAM_LOW, %eax
+	movl	PARAM_HIGH, %edx
+	divl	PARAM_DIVISOR
+	movl	PARAM_REMPTR, %ecx
+	movl	%edx, (%ecx)
+	ret
+EPILOGUE()

diff --git a/mpn/x86/umul.asm b/mpn/x86/umul.asm
new file mode 100644
index 0000000..0e703ac
--- /dev/null
+++ b/mpn/x86/umul.asm

@@ -0,0 +1,51 @@
+dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+defframe(PARAM_M2,    12)
+defframe(PARAM_M1,     8)
+defframe(PARAM_LOWPTR, 4)
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(mpn_umul_ppmm)
+deflit(`FRAME',0)
+	movl	PARAM_LOWPTR, %ecx
+	movl	PARAM_M1, %eax
+	mull	PARAM_M2
+	movl	%eax, (%ecx)
+	movl	%edx, %eax
+	ret
+EPILOGUE()

diff --git a/mpn/x86/x86-defs.m4 b/mpn/x86/x86-defs.m4
new file mode 100644
index 0000000..81309b2
--- /dev/null
+++ b/mpn/x86/x86-defs.m4

@@ -0,0 +1,1024 @@
+divert(-1)
+
+dnl  m4 macros for x86 assembler.
+
+dnl  Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Notes:
+dnl
+dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
+dnl  problems are,
+dnl
+dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
+dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
+dnl     around by using deflit() from asm-defs.m4, instead of define().
+dnl
+dnl  2. Immediates in macro definitions need a space or `' to stop the $
+dnl     looking like a macro parameter.  For example,
+dnl
+dnl	        define(foo, `mov $ 123, %eax')
+dnl
+dnl     This is only a problem in macro definitions, not in ordinary text,
+dnl     and not in macro parameters like text passed to forloop() or ifdef().
+
+
+deflit(GMP_LIMB_BYTES, 4)
+
+
+dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
+dnl  undefine PIC since we don't need to be position independent in this
+dnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
+
+ifdef(`DLL_EXPORT',`undefine(`PIC')')
+
+
+dnl  Usage: CPUVEC_FUNCS_LIST
+dnl
+dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
+dnl  order they appear in that structure.
+
+define(CPUVEC_FUNCS_LIST,
+``add_n',
+`addlsh1_n',
+`addlsh2_n',
+`addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`cnd_add_n',
+`cnd_sub_n',
+`com',
+`copyd',
+`copyi',
+`divexact_1',
+`divrem_1',
+`gcd_11',
+`lshift',
+`lshiftc',
+`mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
+`mod_34lsub1',
+`modexact_1c_odd',
+`mul_1',
+`mul_basecase',
+`mullo_basecase',
+`preinv_divrem_1',
+`preinv_mod_1',
+`redc_1',
+`redc_2',
+`rshift',
+`sqr_basecase',
+`sub_n',
+`sublsh1_n',
+`submul_1'')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,
+dnl  since different alignments are wanted in various circumstances.  So for
+dnl  instance,
+dnl
+dnl                  TEXT
+dnl                  ALIGN(16)
+dnl          PROLOGUE(mpn_add_n)
+dnl          ...
+dnl          EPILOGUE()
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+m4_assert_defined(`WANT_PROFILING')
+	`GLOBL	$1
+	TYPE($1,`function')
+	COFF_TYPE($1)
+$1:
+ifelse(WANT_PROFILING,`prof',      `	call_mcount')
+ifelse(WANT_PROFILING,`gprof',     `	call_mcount')
+ifelse(WANT_PROFILING,`instrument',`	call_instrument(enter)')
+')
+
+
+dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
+dnl
+dnl  Emit COFF style ".def ... .endef" type information for a function, when
+dnl  supported.  The argument should include any GSYM_PREFIX.
+dnl
+dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
+
+define(COFF_TYPE,
+m4_assert_numargs(1)
+m4_assert_defined(`HAVE_COFF_TYPE')
+`ifelse(HAVE_COFF_TYPE,yes,
+	`.def	$1
+	.scl	2
+	.type	32
+	.endef')')
+
+
+dnl  Usage: call_mcount
+dnl
+dnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of
+dnl  the assembler routines use %ebp this way, so it's done only for the
+dnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount
+dnl  gets the current function from (%esp) and the parent from 4(%ebp).
+dnl
+dnl  For `prof' style profiling gcc generates mcount calls without setting
+dnl  up %ebp, and the same is done here.
+
+define(`call_mcount',
+m4_assert_numargs(-1)
+m4_assert_defined(`WANT_PROFILING')
+m4_assert_defined(`MCOUNT_PIC_REG')
+m4_assert_defined(`MCOUNT_NONPIC_REG')
+m4_assert_defined(`MCOUNT_PIC_CALL')
+m4_assert_defined(`MCOUNT_NONPIC_CALL')
+`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
+`	DATA
+	ALIGN(4)
+L(mcount_data_`'mcount_counter):
+	W32	0
+	TEXT
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+`	pushl	%ebp
+	movl	%esp, %ebp
+')dnl
+ifdef(`PIC',
+`	pushl	%ebx
+	call_movl_eip_to_ebx
+L(mcount_here_`'mcount_counter):
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
+ifelse(MCOUNT_PIC_REG,,,
+`	leal	L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
+MCOUNT_PIC_CALL
+	popl	%ebx
+',`dnl non-PIC
+ifelse(MCOUNT_NONPIC_REG,,,
+`	movl	`$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
+')dnl
+MCOUNT_NONPIC_CALL
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+`	popl	%ebp
+')
+define(`mcount_counter',incr(mcount_counter))
+')
+
+define(mcount_counter,1)
+
+
+dnl  Usage: call_instrument(enter|exit)
+dnl
+dnl  Call __cyg_profile_func_enter or __cyg_profile_func_exit.
+dnl
+dnl  For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
+dnl  so %ebx is just setup for these calls.  It's a bit wasteful to repeat
+dnl  the setup for the exit call having done it earlier for the enter, but
+dnl  there's nowhere very convenient to hold %ebx through the length of a
+dnl  routine, in general.
+dnl
+dnl  For PIC, because instrument_current_function will be within the current
+dnl  object file we can get it just as an offset from %eip, there's no need
+dnl  to use the GOT.
+dnl
+dnl  No attempt is made to maintain the stack alignment gcc generates with
+dnl  -mpreferred-stack-boundary.  This wouldn't be hard, but it seems highly
+dnl  unlikely the instrumenting functions would be doing anything that'd
+dnl  benefit from alignment, in particular they're unlikely to be using
+dnl  doubles or long doubles on the stack.
+dnl
+dnl  The FRAME scheme is used to conveniently account for the register saves
+dnl  before accessing the return address.  Any previous value is saved and
+dnl  restored, since plenty of code keeps a value across a "ret" in the
+dnl  middle of a routine.
+
+define(call_instrument,
+m4_assert_numargs(1)
+`	pushdef(`FRAME',0)
+ifelse($1,exit,
+`	pushl	%eax	FRAME_pushl()	C return value
+')
+ifdef(`PIC',
+`	pushl	%ebx	FRAME_pushl()
+	call_movl_eip_to_ebx
+L(instrument_here_`'instrument_count):
+	movl	%ebx, %ecx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
+	C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
+	addl	$instrument_current_function-L(instrument_here_`'instrument_count), %ecx
+	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
+	pushl	%ecx				FRAME_pushl()	C this function
+	call	GSYM_PREFIX`'__cyg_profile_func_$1@PLT
+	addl	$`'8, %esp
+	popl	%ebx
+',
+`	C non-PIC
+	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
+	pushl	$instrument_current_function	FRAME_pushl()	C this function
+	call	GSYM_PREFIX`'__cyg_profile_func_$1
+	addl	$`'8, %esp
+')
+ifelse($1,exit,
+`	popl	%eax			C return value
+')
+	popdef(`FRAME')
+define(`instrument_count',incr(instrument_count))
+')
+define(instrument_count,1)
+
+
+dnl  Usage: instrument_current_function
+dnl
+dnl  Return the current function name for instrumenting purposes.  This is
+dnl  PROLOGUE_current_function, but it sticks at the first such name seen.
+dnl
+dnl  Sticking to the first name seen ensures that multiple-entrypoint
+dnl  functions like mpn_add_nc and mpn_add_n will make enter and exit calls
+dnl  giving the same function address.
+
+define(instrument_current_function,
+m4_assert_numargs(-1)
+`ifdef(`instrument_current_function_seen',
+`instrument_current_function_seen',
+`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
+PROLOGUE_current_function')')
+
+
+dnl  Usage: call_movl_eip_to_ebx
+dnl
+dnl  Generate a call to L(movl_eip_to_ebx), and record the need for that
+dnl  routine.
+
+define(call_movl_eip_to_ebx,
+m4_assert_numargs(-1)
+`call	L(movl_eip_to_ebx)
+define(`movl_eip_to_ebx_needed',1)')
+
+dnl  Usage: generate_movl_eip_to_ebx
+dnl
+dnl  Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
+
+define(generate_movl_eip_to_ebx,
+m4_assert_numargs(-1)
+`ifelse(movl_eip_to_ebx_needed,1,
+`ifelse(movl_eip_to_ebx_done,1,,
+`L(movl_eip_to_ebx):
+	movl	(%esp), %ebx
+	ret_internal
+define(`movl_eip_to_ebx_done',1)
+')')')
+
+
+dnl  Usage: ret
+dnl
+dnl  Generate a "ret", but if doing instrumented profiling then call
+dnl  __cyg_profile_func_exit first.
+
+define(ret,
+m4_assert_numargs(-1)
+m4_assert_defined(`WANT_PROFILING')
+`ifelse(WANT_PROFILING,instrument,
+`ret_instrument',
+`ret_internal')
+generate_movl_eip_to_ebx
+')
+
+
+dnl  Usage: ret_internal
+dnl
+dnl  A plain "ret", without any __cyg_profile_func_exit call.  This can be
+dnl  used for a return which is internal to some function, such as when
+dnl  getting %eip for PIC.
+
+define(ret_internal,
+m4_assert_numargs(-1)
+``ret'')
+
+
+dnl  Usage: ret_instrument
+dnl
+dnl  Generate call to __cyg_profile_func_exit and then a ret.  If a ret has
+dnl  already been seen from this function then jump to that chunk of code,
+dnl  rather than emitting it again.
+
+define(ret_instrument,
+m4_assert_numargs(-1)
+`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
+`jmp	L(instrument_exit_`'instrument_current_function)',
+`define(ret_instrument_seen_`'instrument_current_function,1)
+L(instrument_exit_`'instrument_current_function):
+call_instrument(exit)
+	ret_internal')')
+
+
+dnl  Usage: _GLOBAL_OFFSET_TABLE_
+dnl
+dnl  Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
+dnl  This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
+dnl  work with systems requiring an extra underscore such as OpenBSD.
+dnl
+dnl  deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
+dnl  out right, though that form doesn't work properly in gas (see
+dnl  mpn/x86/README).
+
+deflit(_GLOBAL_OFFSET_TABLE_,
+m4_assert_defined(`GOT_GSYM_PREFIX')
+`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various x86 macros.
+dnl
+
+
+dnl  Usage: ALIGN_OFFSET(bytes,offset)
+dnl
+dnl  Align to `offset' away from a multiple of `bytes'.
+dnl
+dnl  This is useful for testing, for example align to something very strict
+dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
+dnl
+dnl  Generally you wouldn't execute across the padding, but it's done with
+dnl  nop's so it'll work.
+
+define(ALIGN_OFFSET,
+m4_assert_numargs(2)
+`ALIGN($1)
+forloop(`i',1,$2,`	nop
+')')
+
+
+dnl  Usage: defframe(name,offset)
+dnl
+dnl  Make a definition like the following with which to access a parameter
+dnl  or variable on the stack.
+dnl
+dnl         define(name,`FRAME+offset(%esp)')
+dnl
+dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
+dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
+dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
+dnl  zero offset is wanted.
+dnl
+dnl  The new macro also gets a check that when it's used FRAME is actually
+dnl  defined, and that the final %esp offset isn't negative, which would
+dnl  mean an attempt to access something below the current %esp.
+dnl
+dnl  deflit() is used rather than a plain define(), so the new macro won't
+dnl  delete any following parenthesized expression.  name(%edi) will come
+dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
+dnl  provoke an error, which is better than silently giving just 16(%esp).
+dnl
+dnl  See README for more on the suggested way to access the stack frame.
+
+define(defframe,
+m4_assert_numargs(2)
+`deflit(`$1',
+m4_assert_defined(`FRAME')
+`defframe_check_notbelow(`$1',$2,FRAME)dnl
+defframe_empty_if_zero(FRAME+($2))(%esp)')')
+
+dnl  Called: defframe_empty_if_zero(expression)
+define(defframe_empty_if_zero,
+m4_assert_numargs(1)
+`ifelse(defframe_empty_if_zero_disabled,1,
+`eval($1)',
+`m4_empty_if_zero($1)')')
+
+dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
+define(defframe_check_notbelow,
+m4_assert_numargs(3)
+`ifelse(eval(($3)+($2)<0),1,
+`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
+')')')
+
+
+dnl  Usage: FRAME_pushl()
+dnl         FRAME_popl()
+dnl         FRAME_addl_esp(n)
+dnl         FRAME_subl_esp(n)
+dnl
+dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
+dnl  %esp of n bytes.
+dnl
+dnl  Using these macros is completely optional.  Sometimes it makes more
+dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
+dnl  jumps and different sequences of FRAME values need to be used in
+dnl  different places.
+
+define(FRAME_pushl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+4))')
+
+define(FRAME_popl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-4))')
+
+define(FRAME_addl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-($1)))')
+
+define(FRAME_subl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+($1)))')
+
+
+dnl  Usage: defframe_pushl(name)
+dnl
+dnl  Do a combination FRAME_pushl() and a defframe() to name the stack
+dnl  location just pushed.  This should come after a pushl instruction.
+dnl  Putting it on the same line works and avoids lengthening the code.  For
+dnl  example,
+dnl
+dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
+dnl
+dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
+dnl  current value without tracking future changes.
+
+define(defframe_pushl,
+m4_assert_numargs(1)
+`FRAME_pushl()defframe(`$1',-FRAME)')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Assembler instruction macros.
+dnl
+
+
+dnl  Usage: emms_or_femms
+dnl         femms_available_p
+dnl
+dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
+dnl  femms instruction is available.  emms_or_femms expands to femms if
+dnl  available, or emms if not.
+dnl
+dnl  emms_or_femms is meant for use in the K6 directory where plain K6
+dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
+dnl  supported together.
+dnl
+dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
+dnl  emms may as well be used.
+
+define(femms_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(
+	`HAVE_HOST_CPU_k62',
+	`HAVE_HOST_CPU_k63',
+	`HAVE_HOST_CPU_athlon')')
+
+define(emms_or_femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,`femms',`emms')')
+
+
+dnl  Usage: femms
+dnl
+dnl  Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
+dnl  following is a replacement using .byte.
+
+define(femms,
+m4_assert_numargs(-1)
+`.byte	15,14	C AMD 3DNow femms')
+
+
+dnl  Usage: jadcl0(op)
+dnl
+dnl  Generate a jnc/incl as a substitute for adcl $0,op.  Note this isn't an
+dnl  exact replacement, since it doesn't set the flags like adcl does.
+dnl
+dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
+dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
+dnl  misprediction penalty is small, and the multiply algorithm used leads
+dnl  to a carry bit on average only 1/4 of the time.
+dnl
+dnl  jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
+dnl  for comparison.  For example,
+dnl
+dnl		define(`jadcl0_disabled',1)
+dnl
+dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
+dnl  the same size as an adcl.  This makes it possible to use the exact same
+dnl  computed jump code when testing the relative speed of the two.
+
+define(jadcl0,
+m4_assert_numargs(1)
+`ifelse(jadcl0_disabled,1,
+	`adcl	$`'0, $1',
+	`jnc	L(jadcl0_`'jadcl0_counter)
+	incl	$1
+L(jadcl0_`'jadcl0_counter):
+define(`jadcl0_counter',incr(jadcl0_counter))')')
+
+define(jadcl0_counter,1)
+
+
+dnl  Usage: x86_lookup(target, key,value, key,value, ...)
+dnl         x86_lookup_p(target, key,value, key,value, ...)
+dnl
+dnl  Look for `target' among the `key' parameters.
+dnl
+dnl  x86_lookup expands to the corresponding `value', or generates an error
+dnl  if `target' isn't found.
+dnl
+dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+define(x86_lookup_p,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1, `0',
+`ifelse(`$1',`$2',    `1',
+`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
+
+
+dnl  Usage: x86_opcode_reg32(reg)
+dnl         x86_opcode_reg32_p(reg)
+dnl
+dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
+dnl  32-bit register, eg. `%ebp' turns into 5.
+dnl
+dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
+dnl  if not.
+
+define(x86_opcode_reg32,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_p,
+m4_assert_onearg()
+`x86_lookup_p(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_list,
+``%eax',0,
+`%ecx',1,
+`%edx',2,
+`%ebx',3,
+`%esp',4,
+`%ebp',5,
+`%esi',6,
+`%edi',7')
+
+
+dnl  Usage: x86_opcode_tttn(cond)
+dnl
+dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
+dnl  condition (like `c', `ae', etc).
+
+define(x86_opcode_tttn,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_ttn_list)')
+
+define(x86_opcode_tttn_list,
+``o',  0,
+`no',  1,
+`b',   2, `c',  2, `nae',2,
+`nb',  3, `nc', 3, `ae', 3,
+`e',   4, `z',  4,
+`ne',  5, `nz', 5,
+`be',  6, `na', 6,
+`nbe', 7, `a',  7,
+`s',   8,
+`ns',  9,
+`p',  10, `pe', 10, `npo',10,
+`np', 11, `npe',11, `po', 11,
+`l',  12, `nge',12,
+`nl', 13, `ge', 13,
+`le', 14, `ng', 14,
+`nle',15, `g',  15')
+
+
+dnl  Usage: cmovCC(%srcreg,%dstreg)
+dnl
+dnl  Emit a cmov instruction, using a .byte sequence, since various past
+dnl  versions of gas don't know cmov.  For example,
+dnl
+dnl         cmovz(  %eax, %ebx)
+dnl
+dnl  The source operand can only be a plain register.  (m4 code implementing
+dnl  full memory addressing modes exists, believe it or not, but isn't
+dnl  currently needed and isn't included.)
+dnl
+dnl  All the standard conditions are defined.  Attempting to use one without
+dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
+dnl  an error.  This protects against writing something old gas wouldn't
+dnl  understand.
+
+dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
+define(define_cmov_many,
+`ifelse(m4_length(`$1'),0,,
+`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
+
+dnl  Called: define_cmov(cond,tttn)
+dnl  Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
+define(define_cmov,
+m4_assert_numargs(2)
+`define(`cmov$1',
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`cmov_internal'(m4_doublequote($`'0),``$2'',dnl
+m4_doublequote($`'1),m4_doublequote($`'2)))')
+
+define_cmov_many(x86_opcode_tttn_list)
+
+dnl  Called: cmov_internal(name,tttn,src,dst)
+define(cmov_internal,
+m4_assert_numargs(4)
+`.byte	dnl
+15, dnl
+eval(64+$2), dnl
+eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
+	C `$1 $3, $4'')
+
+
+dnl  Usage: x86_opcode_regmmx(reg)
+dnl
+dnl  Validate the given mmx register, and return its number, 0 to 7.
+
+define(x86_opcode_regmmx,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regmmx_list)')
+
+define(x86_opcode_regmmx_list,
+``%mm0',0,
+`%mm1',1,
+`%mm2',2,
+`%mm3',3,
+`%mm4',4,
+`%mm5',5,
+`%mm6',6,
+`%mm7',7')
+
+
+dnl  Usage: psadbw(%srcreg,%dstreg)
+dnl
+dnl  Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
+dnl  FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences.  For
+dnl  example,
+dnl
+dnl         psadbw( %mm1, %mm2)
+dnl
+dnl  Only register->register forms are supported here, which suffices for
+dnl  the current code.
+
+define(psadbw,
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`.byte 0x0f,0xf6,dnl
+eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
+	C `psadbw $1, $2'')
+
+
+dnl  Usage: Zdisp(inst,op,op,op)
+dnl
+dnl  Generate explicit .byte sequences if necessary to force a byte-sized
+dnl  zero displacement on an instruction.  For example,
+dnl
+dnl         Zdisp(  movl,   0,(%esi), %eax)
+dnl
+dnl  expands to
+dnl
+dnl                 .byte   139,70,0  C movl 0(%esi), %eax
+dnl
+dnl  If the displacement given isn't 0, then normal assembler code is
+dnl  generated.  For example,
+dnl
+dnl         Zdisp(  movl,   4,(%esi), %eax)
+dnl
+dnl  expands to
+dnl
+dnl                 movl    4(%esi), %eax
+dnl
+dnl  This means a single Zdisp() form can be used with an expression for the
+dnl  displacement, and .byte will be used only if necessary.  The
+dnl  displacement argument is eval()ed.
+dnl
+dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
+dnl  implemented with a table of instructions and encodings.  A new entry is
+dnl  needed for any different operation or registers.  The table is split
+dnl  into separate macros to avoid overflowing BSD m4 macro expansion space.
+
+define(Zdisp,
+m4_assert_numargs(4)
+`define(`Zdisp_found',0)dnl
+Zdisp_1($@)dnl
+Zdisp_2($@)dnl
+Zdisp_3($@)dnl
+Zdisp_4($@)dnl
+ifelse(Zdisp_found,0,
+`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
+')')')
+
+define(Zdisp_1,`dnl
+Zdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl
+Zdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl
+Zdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl
+Zdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl
+Zdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl
+Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
+Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
+Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax,  `0x13,0x44,0x8b,0x00',      $@)`'dnl
+Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax,  `0x1b,0x44,0x8b,0x00',      $@)`'dnl
+')
+define(Zdisp_2,`dnl
+Zdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl
+Zdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl
+Zdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
+Zdisp_match( mov, 0,(%esi,%ecx,4), %eax,  `0x8b,0x44,0x8e,0x00',      $@)`'dnl
+Zdisp_match( mov, %eax, 0,(%edi,%ecx,4),  `0x89,0x44,0x8f,0x00',      $@)`'dnl
+')
+define(Zdisp_3,`dnl
+Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl
+Zdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
+')
+define(Zdisp_4,`dnl
+Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
+')
+
+define(Zdisp_match,
+m4_assert_numargs(9)
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+	&& m4_stringequal_p(`$2',0)
+	&& m4_stringequal_p(`$3',`$8')
+	&& m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$7'),0,
+`	.byte	$5  C `$1 0$3, $4'',
+`	$6	$7$8, $9')',
+
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+	&& m4_stringequal_p(`$2',`$7')
+	&& m4_stringequal_p(`$3',0)
+	&& m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$8'),0,
+`	.byte	$5  C `$1 $2, 0$4'',
+`	$6	$7, $8$9')')')')
+
+
+dnl  Usage: shldl(count,src,dst)
+dnl         shrdl(count,src,dst)
+dnl         shldw(count,src,dst)
+dnl         shrdw(count,src,dst)
+dnl
+dnl  Generate a double-shift instruction, possibly omitting a %cl count
+dnl  parameter if that's what the assembler requires, as indicated by
+dnl  WANT_SHLDL_CL in config.m4.  For example,
+dnl
+dnl         shldl(  %cl, %eax, %ebx)
+dnl
+dnl  turns into either
+dnl
+dnl         shldl   %cl, %eax, %ebx
+dnl  or
+dnl         shldl   %eax, %ebx
+dnl
+dnl  Immediate counts are always passed through unchanged.  For example,
+dnl
+dnl         shrdl(  $2, %esi, %edi)
+dnl  becomes
+dnl         shrdl   $2, %esi, %edi
+dnl
+dnl
+dnl  If you forget to use the macro form "shldl( ...)" and instead write
+dnl  just a plain "shldl ...", an error results.  This ensures the necessary
+dnl  variant treatment of %cl isn't accidentally bypassed.
+
+define(define_shd_instruction,
+m4_assert_numargs(1)
+`define($1,
+m4_instruction_wrapper()
+m4_assert_numargs(3)
+`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
+m4_doublequote($`'2),m4_doublequote($`'3)))')
+
+dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
+define_shd_instruction(shldl)
+define_shd_instruction(shrdl)
+define_shd_instruction(shldw)
+define_shd_instruction(shrdw)
+
+dnl  Called: shd_instruction(op,count,src,dst)
+define(shd_instruction,
+m4_assert_numargs(4)
+m4_assert_defined(`WANT_SHLDL_CL')
+`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
+``$1'	`$3', `$4'',
+``$1'	`$2', `$3', `$4'')')
+
+
+dnl  Usage: ASSERT([cond][,instructions])
+dnl
+dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl  flags condition to then be satisfied.  For example,
+dnl
+dnl         ASSERT(ne, `cmpl %eax, %ebx')
+dnl
+dnl  The instructions can be omitted to just assert a flags condition with
+dnl  no extra calculation.  For example,
+dnl
+dnl         ASSERT(nc)
+dnl
+dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
+dnl  flags, but the instructions themselves must preserve any registers that
+dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
+dnl  given can use defframe() stack variables.
+dnl
+dnl  The condition can be omitted to just output the given instructions when
+dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
+dnl  For example,
+dnl
+dnl         ASSERT(, `movl %eax, VAR_KEEPVAL')
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$1',,
+	`$2',
+	`C ASSERT
+ifelse(`$2',,,`	pushf	ifdef(`FRAME',`FRAME_pushl()')')
+	$2
+	j`$1'	L(ASSERT_ok`'ASSERT_counter)
+	ud2	C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+ifelse(`$2',,,`	popf	ifdef(`FRAME',`FRAME_popl()')')
+define(`ASSERT_counter',incr(ASSERT_counter))')')')
+
+define(ASSERT_counter,1)
+
+
+dnl  Usage: movl_text_address(label,register)
+dnl
+dnl  Get the address of a text segment label, using either a plain movl or a
+dnl  position-independent calculation, as necessary.  For example,
+dnl
+dnl         movl_code_address(L(foo),%eax)
+dnl
+dnl  This macro is only meant for use in ASSERT()s or when testing, since
+dnl  the PIC sequence it generates will want to be done with a ret balancing
+dnl  the call on CPUs with return address branch prediction.
+dnl
+dnl  The addl generated here has a backward reference to the label, and so
+dnl  won't suffer from the two forwards references bug in old gas (described
+dnl  in mpn/x86/README).
+
+define(movl_text_address,
+m4_assert_numargs(2)
+`ifdef(`PIC',
+	`call	L(movl_text_address_`'movl_text_address_counter)
+L(movl_text_address_`'movl_text_address_counter):
+	popl	$2	C %eip
+	addl	`$'$1-L(movl_text_address_`'movl_text_address_counter), $2
+define(`movl_text_address_counter',incr(movl_text_address_counter))',
+	`movl	`$'$1, $2')')
+
+define(movl_text_address_counter,1)
+
+
+dnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
+dnl
+dnl  Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
+dnl  appropriate for nails in use or not.
+
+define(notl_or_xorl_GMP_NUMB_MASK,
+m4_assert_numargs(1)
+`ifelse(GMP_NAIL_BITS,0,
+`notl	`$1'',
+`xorl	$GMP_NUMB_MASK, `$1'')')
+
+
+dnl  Usage LEA(symbol,reg)
+dnl  Usage LEAL(symbol_local_to_file,reg)
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`dnl
+ifelse(index(defn(`load_eip'), `$2'),-1,
+`m4append(`load_eip',
+`	TEXT
+	ALIGN(16)
+L(movl_eip_`'substr($2,1)):
+	movl	(%esp), $2
+	ret_internal
+')')dnl
+	call	L(movl_eip_`'substr($2,1))
+	addl	$_GLOBAL_OFFSET_TABLE_, $2
+	movl	$1@GOT($2), $2
+',`
+	movl	`$'$1, $2
+')')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`ifdef(`PIC',`dnl
+ifelse(index(defn(`load_eip'), `$2'),-1,
+`m4append(`load_eip',
+`	TEXT
+	ALIGN(16)
+L(movl_eip_`'substr($2,1)):
+	movl	(%esp), $2
+	ret_internal
+')')dnl
+	call	L(movl_eip_`'substr($2,1))
+	addl	$_GLOBAL_OFFSET_TABLE_, $2
+	leal	$1@GOTOFF($2), $2
+',`
+	movl	`$'$1, $2
+')')
+
+dnl ASM_END
+
+define(`ASM_END',`load_eip')
+
+define(`load_eip', `')		dnl updated in LEA/LEAL
+
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+	`RODATA
+	ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`	SIZE(`$1',.-`$1')')
+
+dnl  Usage: CALL(funcname)
+dnl
+
+define(`CALL',
+m4_assert_numargs(1)
+`ifdef(`PIC',
+  `call	GSYM_PREFIX`'$1@PLT',
+  `call	GSYM_PREFIX`'$1')')
+
+ifdef(`PIC',
+`define(`PIC_WITH_EBX')',
+`undefine(`PIC_WITH_EBX')')
+
+divert`'dnl

diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm
new file mode 100644
index 0000000..af317fd
--- /dev/null
+++ b/mpn/x86_64/aorrlsh1_n.asm

@@ -0,0 +1,170 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Copyright 2003, 2005-2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 2
+C AMD K10	 2
+C AMD bd1	 ?
+C AMD bobcat	 ?
+C Intel P4	 13
+C Intel core2	 3.45
+C Intel NHM	 ?
+C Intel SBR	 ?
+C Intel atom	 ?
+C VIA nano	 ?
+
+
+C Sometimes speed degenerates, supposedly related to that some operand
+C alignments cause cache conflicts.
+
+C The speed is limited by decoding/issue bandwidth.  There are 22 instructions
+C in the loop, which corresponds to ceil(22/3)/4 = 1.83 c/l.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_addlsh1_n', `
+  define(ADDSUB,	add)
+  define(ADCSBB,	adc)
+  define(func,		mpn_addlsh1_n)')
+ifdef(`OPERATION_rsblsh1_n', `
+  define(ADDSUB,	sub)
+  define(ADCSBB,	sbb)
+  define(func,		mpn_rsblsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	push	%rbp
+
+	mov	(vp), %r8
+	mov	R32(n), R32(%rax)
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	neg	n
+	xor	R32(%rbp), R32(%rbp)
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	add	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	mov	16(vp,n,8), %r10
+	adc	%r10, %r10
+	sbb	R32(%rax), R32(%rax)	C save scy
+	ADDSUB	(up,n,8), %r8
+	ADCSBB	8(up,n,8), %r9
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+	ADCSBB	16(up,n,8), %r10
+	mov	%r10, 16(rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$3, n
+	jmp	L(ent)
+
+L(b10):	add	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	sbb	R32(%rax), R32(%rax)	C save scy
+	ADDSUB	(up,n,8), %r8
+	ADCSBB	8(up,n,8), %r9
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$2, n
+	jmp	L(ent)
+
+L(b01):	add	%r8, %r8
+	sbb	R32(%rax), R32(%rax)	C save scy
+	ADDSUB	(up,n,8), %r8
+	mov	%r8, (rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	inc	n
+L(ent):	jns	L(end)
+
+	ALIGN(16)
+L(top):	add	R32(%rax), R32(%rax)	C restore scy
+
+	mov	(vp,n,8), %r8
+L(b00):	adc	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	mov	16(vp,n,8), %r10
+	adc	%r10, %r10
+	mov	24(vp,n,8), %r11
+	adc	%r11, %r11
+
+	sbb	R32(%rax), R32(%rax)	C save scy
+	add	R32(%rbp), R32(%rbp)	C restore acy
+
+	ADCSBB	(up,n,8), %r8
+	nop				C Hammer speedup!
+	ADCSBB	8(up,n,8), %r9
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+	ADCSBB	16(up,n,8), %r10
+	ADCSBB	24(up,n,8), %r11
+	mov	%r10, 16(rp,n,8)
+	mov	%r11, 24(rp,n,8)
+
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$4, n
+	js	L(top)
+
+L(end):
+ifdef(`OPERATION_addlsh1_n',`
+	add	R32(%rbp), R32(%rax)
+	neg	R32(%rax)')
+ifdef(`OPERATION_rsblsh1_n',`
+	sub	R32(%rax), R32(%rbp)
+	movslq	R32(%rbp), %rax')
+
+	pop	%rbp
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aorrlsh2_n.asm b/mpn/x86_64/aorrlsh2_n.asm
new file mode 100644
index 0000000..6c17830
--- /dev/null
+++ b/mpn/x86_64/aorrlsh2_n.asm

@@ -0,0 +1,53 @@
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009-2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n',`
+  define(ADDSUB,	add)
+  define(ADCSBB,	adc)
+  define(func,		mpn_addlsh2_n)')
+ifdef(`OPERATION_rsblsh2_n',`
+  define(ADDSUB,	sub)
+  define(ADCSBB,	sbb)
+  define(func,		mpn_rsblsh2_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/aorrlshC_n.asm')

diff --git a/mpn/x86_64/aorrlshC_n.asm b/mpn/x86_64/aorrlshC_n.asm
new file mode 100644
index 0000000..de00154
--- /dev/null
+++ b/mpn/x86_64/aorrlshC_n.asm

@@ -0,0 +1,172 @@
+dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl  Copyright 2009-2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+C	     cycles/limb
+C AMD K8,K9	 2.1
+C AMD K10	 2.0
+C AMD bd1	~2.7
+C AMD bd2	~2.7
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD zen	 2.0
+C AMD bt1	 3.3
+C AMD bt2	 3.0
+C Intel P4	 ?
+C Intel PNR	 3.0
+C Intel NHM	 2.75
+C Intel SBR	 2.55
+C Intel IBR	 2.49
+C Intel HWL	 2.25
+C Intel BWL	 1.89
+C Intel SKL	 1.90
+C Intel atom	 8.4
+C Intel SLM	 4.0
+C VIA nano	 ?
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+
+define(M, eval(m4_lshift(1,LSH)))
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	mov	(vp), %r8
+	lea	(,%r8,M), %r12
+	shr	$RSH, %r8
+
+	mov	R32(n), R32(%rax)
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	neg	n
+	and	$3, R8(%rax)
+	je	L(b00)
+	cmp	$2, R8(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	mov	8(vp,n,8), %r10
+	lea	(%r8,%r10,M), %r14
+	shr	$RSH, %r10
+	mov	16(vp,n,8), %r11
+	lea	(%r10,%r11,M), %r15
+	shr	$RSH, %r11
+	ADDSUB	(up,n,8), %r12
+	ADCSBB	8(up,n,8), %r14
+	ADCSBB	16(up,n,8), %r15
+	sbb	R32(%rax), R32(%rax)		  C save carry for next
+	mov	%r12, (rp,n,8)
+	mov	%r14, 8(rp,n,8)
+	mov	%r15, 16(rp,n,8)
+	add	$3, n
+	js	L(top)
+	jmp	L(end)
+
+L(b01):	mov	%r8, %r11
+	ADDSUB	(up,n,8), %r12
+	sbb	R32(%rax), R32(%rax)		  C save carry for next
+	mov	%r12, (rp,n,8)
+	add	$1, n
+	js	L(top)
+	jmp	L(end)
+
+L(b10):	mov	8(vp,n,8), %r11
+	lea	(%r8,%r11,M), %r15
+	shr	$RSH, %r11
+	ADDSUB	(up,n,8), %r12
+	ADCSBB	8(up,n,8), %r15
+	sbb	R32(%rax), R32(%rax)		  C save carry for next
+	mov	%r12, (rp,n,8)
+	mov	%r15, 8(rp,n,8)
+	add	$2, n
+	js	L(top)
+	jmp	L(end)
+
+L(b00):	mov	8(vp,n,8), %r9
+	mov	16(vp,n,8), %r10
+	jmp	L(e00)
+
+	ALIGN(16)
+L(top):	mov	16(vp,n,8), %r10
+	mov	(vp,n,8), %r8
+	mov	8(vp,n,8), %r9
+	lea	(%r11,%r8,M), %r12
+	shr	$RSH, %r8
+L(e00):	lea	(%r8,%r9,M), %r13
+	shr	$RSH, %r9
+	mov	24(vp,n,8), %r11
+	lea	(%r9,%r10,M), %r14
+	shr	$RSH, %r10
+	lea	(%r10,%r11,M), %r15
+	shr	$RSH, %r11
+	add	R32(%rax), R32(%rax)		  C restore carry
+	ADCSBB	(up,n,8), %r12
+	ADCSBB	8(up,n,8), %r13
+	ADCSBB	16(up,n,8), %r14
+	ADCSBB	24(up,n,8), %r15
+	mov	%r12, (rp,n,8)
+	mov	%r13, 8(rp,n,8)
+	mov	%r14, 16(rp,n,8)
+	sbb	R32(%rax), R32(%rax)		  C save carry for next
+	mov	%r15, 24(rp,n,8)
+	add	$4, n
+	js	L(top)
+L(end):
+
+ifelse(ADDSUB,add,`
+	sub	R32(%r11), R32(%rax)
+	neg	R32(%rax)
+',`
+	add	R32(%r11), R32(%rax)
+	movslq	R32(%rax), %rax
+')
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm
new file mode 100644
index 0000000..eb94d2f
--- /dev/null
+++ b/mpn/x86_64/aorrlsh_n.asm

@@ -0,0 +1,176 @@
+dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
+
+dnl  Copyright 2006, 2010-2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 3.1	< 3.85 for lshift + add_n
+C AMD K10	 3.1	< 3.85 for lshift + add_n
+C Intel P4	14.6	> 7.33 for lshift + add_n
+C Intel core2	 3.87	> 3.27 for lshift + add_n
+C Intel NHM	 4	> 3.75 for lshift + add_n
+C Intel SBR	(5.8)	> 3.46 for lshift + add_n
+C Intel atom	(7.75)	< 8.75 for lshift + add_n
+C VIA nano	 4.7	< 6.25 for lshift + add_n
+
+C This was written quickly and not optimized at all.  Surely one could get
+C closer to 3 c/l or perhaps even under 3 c/l.  Ideas:
+C   1) Use indexing to save the 3 LEA
+C   2) Write reasonable feed-in code
+C   3) Be more clever about register usage
+C   4) Unroll more, handling CL negation, carry save/restore cost much now
+C   5) Reschedule
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+define(`cnt',	`%r8')
+
+ifdef(`OPERATION_addlsh_n',`
+  define(ADCSBB,       `adc')
+  define(func, mpn_addlsh_n)
+')
+ifdef(`OPERATION_rsblsh_n',`
+  define(ADCSBB,       `sbb')
+  define(func, mpn_rsblsh_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8d	')
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%rbp
+	push	%rbx
+
+	mov	n, %rax
+	xor	R32(%rbx), R32(%rbx)	C clear carry save register
+	mov	R32(%r8), R32(%rcx)	C shift count
+	xor	R32(%rbp), R32(%rbp)	C limb carry
+
+	mov	R32(%rax), R32(%r11)
+	and	$3, R32(%r11)
+	je	L(4)
+	sub	$1, R32(%r11)
+
+L(012):	mov	(vp), %r8
+	mov	%r8, %r12
+	shl	R8(%rcx), %r8
+	or	%rbp, %r8
+	neg	R8(%rcx)
+	mov	%r12, %rbp
+	shr	R8(%rcx), %rbp
+	neg	R8(%rcx)
+	add	R32(%rbx), R32(%rbx)
+	ADCSBB	(up), %r8
+	mov	%r8, (rp)
+	sbb	R32(%rbx), R32(%rbx)
+	lea	8(up), up
+	lea	8(vp), vp
+	lea	8(rp), rp
+	sub	$1, R32(%r11)
+	jnc	L(012)
+
+L(4):	sub	$4, %rax
+	jc	L(end)
+
+	ALIGN(16)
+L(top):	mov	(vp), %r8
+	mov	%r8, %r12
+	mov	8(vp), %r9
+	mov	%r9, %r13
+	mov	16(vp), %r10
+	mov	%r10, %r14
+	mov	24(vp), %r11
+
+	shl	R8(%rcx), %r8
+	shl	R8(%rcx), %r9
+	shl	R8(%rcx), %r10
+	or	%rbp, %r8
+	mov	%r11, %rbp
+	shl	R8(%rcx), %r11
+
+	neg	R8(%rcx)
+
+	shr	R8(%rcx), %r12
+	shr	R8(%rcx), %r13
+	shr	R8(%rcx), %r14
+	shr	R8(%rcx), %rbp		C used next iteration
+
+	or	%r12, %r9
+	or	%r13, %r10
+	or	%r14, %r11
+
+	neg	R8(%rcx)
+
+	add	R32(%rbx), R32(%rbx)	C restore carry flag
+
+	ADCSBB	(up), %r8
+	ADCSBB	8(up), %r9
+	ADCSBB	16(up), %r10
+	ADCSBB	24(up), %r11
+
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	mov	%r11, 24(rp)
+
+	sbb	R32(%rbx), R32(%rbx)	C save carry flag
+
+	lea	32(up), up
+	lea	32(vp), vp
+	lea	32(rp), rp
+
+	sub	$4, %rax
+	jnc	L(top)
+
+L(end):	add	R32(%rbx), R32(%rbx)
+	ADCSBB	$0, %rbp
+	mov	%rbp, %rax
+	pop	%rbx
+	pop	%rbp
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aors_err1_n.asm b/mpn/x86_64/aors_err1_n.asm
new file mode 100644
index 0000000..5bc1e27
--- /dev/null
+++ b/mpn/x86_64/aors_err1_n.asm

@@ -0,0 +1,225 @@
+dnl  AMD64 mpn_add_err1_n, mpn_sub_err1_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2.75 (degenerates to 3 c/l for some alignments)
+C AMD K10	 ?
+C Intel P4	 ?
+C Intel core2	 ?
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 ?
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`ep',	`%rcx')
+define(`yp',	`%r8')
+define(`n',	`%r9')
+define(`cy_param',	`8(%rsp)')
+
+define(`el',	`%rbx')
+define(`eh',	`%rbp')
+define(`t0',	`%r10')
+define(`t1',	`%r11')
+define(`t2',	`%r12')
+define(`t3',	`%r13')
+define(`w0',	`%r14')
+define(`w1',	`%r15')
+
+ifdef(`OPERATION_add_err1_n', `
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_add_err1_n)')
+ifdef(`OPERATION_sub_err1_n', `
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_sub_err1_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
+
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	mov	cy_param, %rax
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	lea	(rp,n,8), rp
+
+	mov	R32(n), R32(%r10)
+	and	$3, R32(%r10)
+	jz	L(0mod4)
+	cmp	$2, R32(%r10)
+	jc	L(1mod4)
+	jz	L(2mod4)
+L(3mod4):
+	xor	R32(el), R32(el)
+	xor	R32(eh), R32(eh)
+	xor	R32(t0), R32(t0)
+	xor	R32(t1), R32(t1)
+	lea	-24(yp,n,8), yp
+	neg	n
+
+	shr	$1, %al		   C restore carry
+	mov	(up,n,8), w0
+	mov	8(up,n,8), w1
+	ADCSBB	(vp,n,8), w0
+	mov	w0, (rp,n,8)
+	cmovc	16(yp), el
+	ADCSBB	8(vp,n,8), w1
+	mov	w1, 8(rp,n,8)
+	cmovc	8(yp), t0
+	mov	16(up,n,8), w0
+	ADCSBB	16(vp,n,8), w0
+	mov	w0, 16(rp,n,8)
+	cmovc	(yp), t1
+	setc	%al		   C save carry
+	add	t0, el
+	adc	$0, eh
+	add	t1, el
+	adc	$0, eh
+
+	add	$3, n
+	jnz	L(loop)
+	jmp	L(end)
+
+	ALIGN(16)
+L(0mod4):
+	xor	R32(el), R32(el)
+	xor	R32(eh), R32(eh)
+	lea	(yp,n,8), yp
+	neg	n
+	jmp	L(loop)
+
+	ALIGN(16)
+L(1mod4):
+	xor	R32(el), R32(el)
+	xor	R32(eh), R32(eh)
+	lea	-8(yp,n,8), yp
+	neg	n
+
+	shr	$1, %al		   C restore carry
+	mov	(up,n,8), w0
+	ADCSBB	(vp,n,8), w0
+	mov	w0, (rp,n,8)
+	cmovc	(yp), el
+	setc	%al		   C save carry
+
+	add	$1, n
+	jnz	L(loop)
+	jmp	L(end)
+
+	ALIGN(16)
+L(2mod4):
+	xor	R32(el), R32(el)
+	xor	R32(eh), R32(eh)
+	xor	R32(t0), R32(t0)
+	lea	-16(yp,n,8), yp
+	neg	n
+
+	shr	$1, %al		   C restore carry
+	mov	(up,n,8), w0
+	mov	8(up,n,8), w1
+	ADCSBB	(vp,n,8), w0
+	mov	w0, (rp,n,8)
+	cmovc	8(yp), el
+	ADCSBB	8(vp,n,8), w1
+	mov	w1, 8(rp,n,8)
+	cmovc	(yp), t0
+	setc	%al		   C save carry
+	add	t0, el
+	adc	$0, eh
+
+	add	$2, n
+	jnz	L(loop)
+	jmp	L(end)
+
+	ALIGN(32)
+L(loop):
+	shr	$1, %al		   C restore carry
+	mov	-8(yp), t0
+	mov	$0, R32(t3)
+	mov	(up,n,8), w0
+	mov	8(up,n,8), w1
+	ADCSBB	(vp,n,8), w0
+	cmovnc	t3, t0
+	ADCSBB	8(vp,n,8), w1
+	mov	-16(yp), t1
+	mov	w0, (rp,n,8)
+	mov	16(up,n,8), w0
+	mov	w1, 8(rp,n,8)
+	cmovnc	t3, t1
+	mov	-24(yp), t2
+	ADCSBB	16(vp,n,8), w0
+	cmovnc	t3, t2
+	mov	24(up,n,8), w1
+	ADCSBB	24(vp,n,8), w1
+	cmovc	-32(yp), t3
+	setc	%al		   C save carry
+	add	t0, el
+	adc	$0, eh
+	add	t1, el
+	adc	$0, eh
+	add	t2, el
+	adc	$0, eh
+	mov	w0, 16(rp,n,8)
+	add	t3, el
+	lea	-32(yp), yp
+	adc	$0, eh
+	mov	w1, 24(rp,n,8)
+	add	$4, n
+	jnz	L(loop)
+
+L(end):
+	mov	el, (ep)
+	mov	eh, 8(ep)
+
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aors_err2_n.asm b/mpn/x86_64/aors_err2_n.asm
new file mode 100644
index 0000000..a222117
--- /dev/null
+++ b/mpn/x86_64/aors_err2_n.asm

@@ -0,0 +1,172 @@
+dnl  AMD64 mpn_add_err2_n, mpn_sub_err2_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 4.5
+C AMD K10	 ?
+C Intel P4	 ?
+C Intel core2	 6.9
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 ?
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`ep',	`%rcx')
+define(`yp1',	`%r8')
+define(`yp2',   `%r9')
+define(`n_param',     `8(%rsp)')
+define(`cy_param',    `16(%rsp)')
+
+define(`cy1',   `%r14')
+define(`cy2',   `%rax')
+
+define(`n',     `%r10')
+
+define(`w',     `%rbx')
+define(`e1l',	`%rbp')
+define(`e1h',	`%r11')
+define(`e2l',	`%r12')
+define(`e2h',	`%r13')
+
+
+ifdef(`OPERATION_add_err2_n', `
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_add_err2_n)')
+ifdef(`OPERATION_sub_err2_n', `
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_sub_err2_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
+
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	mov	cy_param, cy2
+	mov	n_param, n
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+
+	xor	R32(e1l), R32(e1l)
+	xor	R32(e1h), R32(e1h)
+	xor	R32(e2l), R32(e2l)
+	xor	R32(e2h), R32(e2h)
+
+	sub	yp1, yp2
+
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+
+	test	$1, n
+	jnz	L(odd)
+
+	lea	-8(yp1,n,8), yp1
+	neg	n
+	jmp	L(top)
+
+	ALIGN(16)
+L(odd):
+	lea	-16(yp1,n,8), yp1
+	neg	n
+	shr	$1, cy2
+	mov	(up,n,8), w
+	ADCSBB	(vp,n,8), w
+	cmovc	8(yp1), e1l
+	cmovc	8(yp1,yp2), e2l
+	mov	w, (rp,n,8)
+	sbb	cy2, cy2
+	inc	n
+	jz	L(end)
+
+	ALIGN(16)
+L(top):
+	mov	(up,n,8), w
+	shr	$1, cy2		C restore carry
+	ADCSBB	(vp,n,8), w
+	mov	w, (rp,n,8)
+	sbb	cy1, cy1	C generate mask, preserve CF
+
+	mov	8(up,n,8), w
+	ADCSBB	8(vp,n,8), w
+	mov	w, 8(rp,n,8)
+	sbb	cy2, cy2	C generate mask, preserve CF
+
+	mov	(yp1), w	C (e1h:e1l) += cy1 * yp1 limb
+	and	cy1, w
+	add	w, e1l
+	adc	$0, e1h
+
+	and	(yp1,yp2), cy1	C (e2h:e2l) += cy1 * yp2 limb
+	add	cy1, e2l
+	adc	$0, e2h
+
+	mov	-8(yp1), w	C (e1h:e1l) += cy2 * next yp1 limb
+	and	cy2, w
+	add	w, e1l
+	adc	$0, e1h
+
+	mov	-8(yp1,yp2), w	C (e2h:e2l) += cy2 * next yp2 limb
+	and	cy2, w
+	add	w, e2l
+	adc	$0, e2h
+
+	add	$2, n
+	lea	-16(yp1), yp1
+	jnz	L(top)
+L(end):
+
+	mov	e1l, (ep)
+	mov	e1h, 8(ep)
+	mov	e2l, 16(ep)
+	mov	e2h, 24(ep)
+
+	and	$1, %eax	C return carry
+
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aors_err3_n.asm b/mpn/x86_64/aors_err3_n.asm
new file mode 100644
index 0000000..5df1b46
--- /dev/null
+++ b/mpn/x86_64/aors_err3_n.asm

@@ -0,0 +1,156 @@
+dnl  AMD64 mpn_add_err3_n, mpn_sub_err3_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 7.0
+C AMD K10	 ?
+C Intel P4	 ?
+C Intel core2	 ?
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 ?
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`ep',	`%rcx')
+define(`yp1',	`%r8')
+define(`yp2',   `%r9')
+define(`yp3_param',   `8(%rsp)')
+define(`n_param',     `16(%rsp)')
+define(`cy_param',    `24(%rsp)')
+
+define(`n',     `%r10')
+define(`yp3',   `%rcx')
+define(`t',     `%rbx')
+
+define(`e1l',	`%rbp')
+define(`e1h',	`%r11')
+define(`e2l',	`%r12')
+define(`e2h',	`%r13')
+define(`e3l',   `%r14')
+define(`e3h',   `%r15')
+
+
+
+ifdef(`OPERATION_add_err3_n', `
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_add_err3_n)')
+ifdef(`OPERATION_sub_err3_n', `
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_sub_err3_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
+
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	mov	cy_param, %rax
+	mov	n_param, n
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	push	ep
+	mov	64(%rsp), yp3       C load from yp3_param
+
+	xor	R32(e1l), R32(e1l)
+	xor	R32(e1h), R32(e1h)
+	xor	R32(e2l), R32(e2l)
+	xor	R32(e2h), R32(e2h)
+	xor	R32(e3l), R32(e3l)
+	xor	R32(e3h), R32(e3h)
+
+	sub	yp1, yp2
+	sub	yp1, yp3
+
+	lea	-8(yp1,n,8), yp1
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	neg	n
+
+	ALIGN(16)
+L(top):
+	shr	$1, %rax		C restore carry
+	mov	(up,n,8), %rax
+	ADCSBB	(vp,n,8), %rax
+	mov	%rax, (rp,n,8)
+	sbb	%rax, %rax		C save carry and generate mask
+
+	mov	(yp1), t
+	and	%rax, t
+	add	t, e1l
+	adc	$0, e1h
+
+	mov	(yp1,yp2), t
+	and	%rax, t
+	add	t, e2l
+	adc	$0, e2h
+
+	mov	(yp1,yp3), t
+	and	%rax, t
+	add	t, e3l
+	adc	$0, e3h
+
+	lea	-8(yp1), yp1
+	inc	n
+	jnz     L(top)
+
+L(end):
+	and	$1, %eax
+	pop	ep
+
+	mov	e1l, (ep)
+	mov	e1h, 8(ep)
+	mov	e2l, 16(ep)
+	mov	e2h, 24(ep)
+	mov	e3l, 32(ep)
+	mov	e3h, 40(ep)
+
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm
new file mode 100644
index 0000000..b63c250
--- /dev/null
+++ b/mpn/x86_64/aors_n.asm

@@ -0,0 +1,178 @@
+dnl  AMD64 mpn_add_n, mpn_sub_n
+
+dnl  Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 1.5
+C AMD K10	 1.5
+C AMD bd1	 1.8
+C AMD bd2	 1.74
+C AMD bd3	 ?
+C AMD bd4	 1.78
+C AMD zen	 1.5
+C AMD bt1	 2.54
+C AMD bt2	 2.15
+C Intel P4	11.5
+C Intel core2	 4.9
+C Intel NHM	 5.53
+C Intel SBR	 1.59
+C Intel IBR	 1.55
+C Intel HWL	 1.44
+C Intel BWL	 1.14
+C Intel SKL	 1.21
+C Intel atom	 4
+C Intel SLM	 3
+C VIA nano	 3.25
+
+C The loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')	C rcx
+define(`up',	`%rsi')	C rdx
+define(`vp',	`%rdx')	C r8
+define(`n',	`%rcx')	C r9
+define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
+
+ifdef(`OPERATION_add_n', `
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_add_n)
+	define(func_nc,	      mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_sub_n)
+	define(func_nc,	      mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_nc)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+	mov	R32(n), R32(%rax)
+	shr	$2, n
+	and	$3, R32(%rax)
+	bt	$0, %r8			C cy flag <- carry parameter
+	jrcxz	L(lt4)
+
+	mov	(up), %r8
+	mov	8(up), %r9
+	dec	n
+	jmp	L(mid)
+
+EPILOGUE()
+	ALIGN(16)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	mov	R32(n), R32(%rax)
+	shr	$2, n
+	and	$3, R32(%rax)
+	jrcxz	L(lt4)
+
+	mov	(up), %r8
+	mov	8(up), %r9
+	dec	n
+	jmp	L(mid)
+
+L(lt4):	dec	R32(%rax)
+	mov	(up), %r8
+	jnz	L(2)
+	ADCSBB	(vp), %r8
+	mov	%r8, (rp)
+	adc	R32(%rax), R32(%rax)
+	FUNC_EXIT()
+	ret
+
+L(2):	dec	R32(%rax)
+	mov	8(up), %r9
+	jnz	L(3)
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	adc	R32(%rax), R32(%rax)
+	FUNC_EXIT()
+	ret
+
+L(3):	mov	16(up), %r10
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	setc	R8(%rax)
+	FUNC_EXIT()
+	ret
+
+	ALIGN(16)
+L(top):	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	ADCSBB	24(vp), %r11
+	mov	%r8, (rp)
+	lea	32(up), up
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	dec	n
+	mov	%r11, 24(rp)
+	lea	32(vp), vp
+	mov	(up), %r8
+	mov	8(up), %r9
+	lea	32(rp), rp
+L(mid):	mov	16(up), %r10
+	mov	24(up), %r11
+	jnz	L(top)
+
+L(end):	lea	32(up), up
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	ADCSBB	24(vp), %r11
+	lea	32(vp), vp
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	mov	%r11, 24(rp)
+	lea	32(rp), rp
+
+	inc	R32(%rax)
+	dec	R32(%rax)
+	jnz	L(lt4)
+	adc	R32(%rax), R32(%rax)
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm
new file mode 100644
index 0000000..3e9705a
--- /dev/null
+++ b/mpn/x86_64/aorsmul_1.asm

@@ -0,0 +1,190 @@
+dnl  AMD64 mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2.52
+C AMD K10	 2.51
+C AMD bd1	 4.43
+C AMD bd2	 5.03	 5.63
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD zen	 ?
+C AMD bobcat	 6.20
+C AMD jaguar	 5.57	 6.56
+C Intel P4	14.9	17.1
+C Intel core2	 5.15
+C Intel NHM	 4.93
+C Intel SBR	 3.95
+C Intel IBR	 3.75
+C Intel HWL	 3.62
+C Intel BWL	 2.53
+C Intel SKL	 2.53
+C Intel atom	21.3
+C Intel SLM	 9.0
+C VIA nano	 5.0
+
+C The loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * The loop is great, but the prologue and epilogue code was quickly written.
+C    Tune it!
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`vl',      `%rcx')   C r9
+
+define(`n',       `%r11')
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+IFDOS(`	define(`up', ``%rsi'')	') dnl
+IFDOS(`	define(`rp', ``%rcx'')	') dnl
+IFDOS(`	define(`vl', ``%r9'')	') dnl
+IFDOS(`	define(`r9', ``rdi'')	') dnl
+IFDOS(`	define(`n',  ``%r8'')	') dnl
+IFDOS(`	define(`r8', ``r11'')	') dnl
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+
+IFDOS(``push	%rsi		'')
+IFDOS(``push	%rdi		'')
+IFDOS(``mov	%rdx, %rsi	'')
+
+	mov	(up), %rax		C read first u limb early
+	push	%rbx
+IFSTD(`	mov	n_param, %rbx   ')	C move away n from rdx, mul uses it
+IFDOS(`	mov	n, %rbx         ')
+	mul	vl
+IFSTD(`	mov	%rbx, n         ')
+
+	and	$3, R32(%rbx)
+	jz	L(b0)
+	cmp	$2, R32(%rbx)
+	jz	L(b2)
+	jg	L(b3)
+
+L(b1):	dec	n
+	jne	L(gt1)
+	ADDSUB	%rax, (rp)
+	jmp	L(ret)
+L(gt1):	lea	8(up,n,8), up
+	lea	-8(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	xor	R32(%rbx), R32(%rbx)
+	mov	%rax, %r9
+	mov	(up,n,8), %rax
+	mov	%rdx, %r8
+	jmp	L(L1)
+
+L(b0):	lea	(up,n,8), up
+	lea	-16(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	mov	%rax, %r8
+	mov	%rdx, %rbx
+	jmp	 L(L0)
+
+L(b3):	lea	-8(up,n,8), up
+	lea	-24(rp,n,8), rp
+	neg	n
+	mov	%rax, %rbx
+	mov	%rdx, %r10
+	jmp	L(L3)
+
+L(b2):	lea	-16(up,n,8), up
+	lea	-32(rp,n,8), rp
+	neg	n
+	xor	%r8, %r8
+	xor	R32(%rbx), R32(%rbx)
+	mov	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%rdx, %r9
+	jmp	L(L2)
+
+	ALIGN(16)
+L(top):	ADDSUB	%r10, (rp,n,8)
+	adc	%rax, %r9
+	mov	(up,n,8), %rax
+	adc	%rdx, %r8
+	mov	$0, R32(%r10)
+L(L1):	mul	vl
+	ADDSUB	%r9, 8(rp,n,8)
+	adc	%rax, %r8
+	adc	%rdx, %rbx
+L(L0):	mov	8(up,n,8), %rax
+	mul	vl
+	ADDSUB	%r8, 16(rp,n,8)
+	adc	%rax, %rbx
+	adc	%rdx, %r10
+L(L3):	mov	16(up,n,8), %rax
+	mul	vl
+	ADDSUB	%rbx, 24(rp,n,8)
+	mov	$0, R32(%r8)		C zero
+	mov	%r8, %rbx		C zero
+	adc	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%r8, %r9		C zero
+	adc	%rdx, %r9
+L(L2):	mul	vl
+	add	$4, n
+	js	 L(top)
+
+	ADDSUB	%r10, (rp,n,8)
+	adc	%rax, %r9
+	adc	%r8, %rdx
+	ADDSUB	%r9, 8(rp,n,8)
+L(ret):	adc	$0, %rdx
+	mov	%rdx, %rax
+
+	pop	%rbx
+IFDOS(``pop	%rdi		'')
+IFDOS(``pop	%rsi		'')
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm
new file mode 100644
index 0000000..bbfa9e0
--- /dev/null
+++ b/mpn/x86_64/bdiv_dbm1c.asm

@@ -0,0 +1,106 @@
+dnl  x86_64 mpn_bdiv_dbm1.
+
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2.25
+C AMD K10	 2.25
+C Intel P4	12.5
+C Intel core2	 4
+C Intel NHM	 3.75
+C Intel SBR	 3.6
+C Intel atom	20
+C VIA nano	 4
+
+C TODO
+C  * Optimise feed-in code.
+
+C INPUT PARAMETERS
+define(`qp',	  `%rdi')
+define(`up',	  `%rsi')
+define(`n_param', `%rdx')
+define(`bd',	  `%rcx')
+define(`cy',	  `%r8')
+
+define(`n',       `%r9')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+	mov	(up), %rax
+	mov	n_param, n
+	mov	R32(n_param), R32(%r11)
+	mul	bd
+	lea	(up,n,8), up
+	lea	(qp,n,8), qp
+	neg	n
+	and	$3, R32(%r11)
+	jz	L(lo0)
+	lea	-4(n,%r11), n
+	cmp	$2, R32(%r11)
+	jc	L(lo1)
+	jz	L(lo2)
+	jmp	L(lo3)
+
+	ALIGN(16)
+L(top):	mov	(up,n,8), %rax
+	mul	bd
+L(lo0):	sub	%rax, %r8
+	mov	%r8, (qp,n,8)
+	sbb	%rdx, %r8
+	mov	8(up,n,8), %rax
+	mul	bd
+L(lo3):	sub	%rax, %r8
+	mov	%r8, 8(qp,n,8)
+	sbb	%rdx, %r8
+	mov	16(up,n,8), %rax
+	mul	bd
+L(lo2):	sub	%rax, %r8
+	mov	%r8, 16(qp,n,8)
+	sbb	%rdx, %r8
+	mov	24(up,n,8), %rax
+	mul	bd
+L(lo1):	sub	%rax, %r8
+	mov	%r8, 24(qp,n,8)
+	sbb	%rdx, %r8
+	add	$4, n
+	jnz	L(top)
+
+	mov	%r8, %rax
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm
new file mode 100644
index 0000000..a6b36e9
--- /dev/null
+++ b/mpn/x86_64/bdiv_q_1.asm

@@ -0,0 +1,195 @@
+dnl  AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Copyright 2001, 2002, 2004-2006, 2010-2012, 2017 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	    cycles/limb    cycles/limb
+C	       norm	       unorm
+C AMD K8,K9	11		11
+C AMD K10	11		11
+C AMD bull	13.5		14
+C AMD pile	14		15
+C AMD steam
+C AMD excavator
+C AMD bobcat	14		14
+C AMD jaguar	14.5		15
+C Intel P4	33		33
+C Intel core2	13.5		13.25
+C Intel NHM	14		14
+C Intel SBR	8		8.25
+C Intel IBR	7.75		7.85
+C Intel HWL	8		8
+C Intel BWL	8		8
+C Intel SKL	8		8
+C Intel atom	34		36
+C Intel SLM	13.7		13.5
+C VIA nano	19.25		19.25	needs re-measuring
+
+C INPUT PARAMETERS
+define(`rp',		`%rdi')
+define(`up',		`%rsi')
+define(`n',		`%rdx')
+define(`d',		`%rcx')
+define(`di',		`%r8')		C	just mpn_pi1_bdiv_q_1
+define(`ncnt',		`%r9')		C	just mpn_pi1_bdiv_q_1
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+	FUNC_ENTRY(4)
+	push	%rbx
+
+	mov	%rcx, %rax
+	xor	R32(%rcx), R32(%rcx)	C ncnt count
+	mov	%rdx, %r10
+
+	bt	$0, R32(%rax)
+	jnc	L(evn)			C skip bsf unless divisor is even
+
+L(odd):	mov	%rax, %rbx
+	shr	R32(%rax)
+	and	$127, R32(%rax)		C d/2, 7 bits
+
+	LEA(	binvert_limb_table, %rdx)
+
+	movzbl	(%rdx,%rax), R32(%rax)	C inv 8 bits
+
+	mov	%rbx, %r11		C d without twos
+
+	lea	(%rax,%rax), R32(%rdx)	C 2*inv
+	imul	R32(%rax), R32(%rax)	C inv*inv
+	imul	R32(%rbx), R32(%rax)	C inv*inv*d
+	sub	R32(%rax), R32(%rdx)	C inv = 2*inv - inv*inv*d, 16 bits
+
+	lea	(%rdx,%rdx), R32(%rax)	C 2*inv
+	imul	R32(%rdx), R32(%rdx)	C inv*inv
+	imul	R32(%rbx), R32(%rdx)	C inv*inv*d
+	sub	R32(%rdx), R32(%rax)	C inv = 2*inv - inv*inv*d, 32 bits
+
+	lea	(%rax,%rax), %r8	C 2*inv
+	imul	%rax, %rax		C inv*inv
+	imul	%rbx, %rax		C inv*inv*d
+	sub	%rax, %r8		C inv = 2*inv - inv*inv*d, 64 bits
+
+	jmp	L(pi1)
+
+L(evn):	bsf	%rax, %rcx
+	shr	R8(%rcx), %rax
+	jmp	L(odd)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+IFDOS(`	mov	64(%rsp), %r9	')
+	push	%rbx
+
+	mov	%rcx, %r11		C d
+	mov	%rdx, %r10		C n
+	mov	%r9, %rcx		C ncnt
+
+L(pi1):	mov	(up), %rax		C up[0]
+
+	dec	%r10
+	jz	L(one)
+
+	lea	8(up,%r10,8), up	C up end
+	lea	(rp,%r10,8), rp		C rp end
+	neg	%r10			C -n
+
+	test	R32(%rcx), R32(%rcx)
+	jnz	L(unorm)		C branch if count != 0
+	xor	R32(%rbx), R32(%rbx)
+	jmp	L(nent)
+
+	ALIGN(8)
+L(ntop):mul	%r11			C carry limb in rdx	0 10
+	mov	-8(up,%r10,8), %rax	C
+	sub	%rbx, %rax		C apply carry bit
+	setc	R8(%rbx)		C
+	sub	%rdx, %rax		C apply carry limb	5
+	adc	$0, R32(%rbx)		C			6
+L(nent):imul	%r8, %rax		C			6
+	mov	%rax, (rp,%r10,8)	C
+	inc	%r10			C
+	jnz	L(ntop)
+
+	mov	-8(up), %r9		C up high limb
+	jmp	L(com)
+
+L(unorm):
+	mov	(up,%r10,8), %r9	C up[1]
+	shr	R8(%rcx), %rax		C
+	neg	R32(%rcx)
+	shl	R8(%rcx), %r9		C
+	neg	R32(%rcx)
+	or	%r9, %rax
+	xor	R32(%rbx), R32(%rbx)
+	jmp	L(uent)
+
+	ALIGN(8)
+L(utop):mul	%r11			C carry limb in rdx	0 10
+	mov	(up,%r10,8), %rax	C
+	shl	R8(%rcx), %rax		C
+	neg	R32(%rcx)
+	or	%r9, %rax
+	sub	%rbx, %rax		C apply carry bit
+	setc	R8(%rbx)		C
+	sub	%rdx, %rax		C apply carry limb	5
+	adc	$0, R32(%rbx)		C			6
+L(uent):imul	%r8, %rax		C			6
+	mov	(up,%r10,8), %r9	C
+	shr	R8(%rcx), %r9		C
+	neg	R32(%rcx)
+	mov	%rax, (rp,%r10,8)	C
+	inc	%r10			C
+	jnz	L(utop)
+
+L(com):	mul	%r11			C carry limb in rdx
+	sub	%rbx, %r9		C apply carry bit
+	sub	%rdx, %r9		C apply carry limb
+	imul	%r8, %r9
+	mov	%r9, (rp)
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+
+L(one):	shr	R8(%rcx), %rax
+	imul	%r8, %rax
+	mov	%rax, (rp)
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/cnd_aors_n.asm b/mpn/x86_64/cnd_aors_n.asm
new file mode 100644
index 0000000..36b2e84
--- /dev/null
+++ b/mpn/x86_64/cnd_aors_n.asm

@@ -0,0 +1,183 @@
+dnl  AMD64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2
+C AMD K10	 2
+C AMD bd1	 2.32
+C AMD bobcat	 3
+C Intel P4	13
+C Intel core2	 2.9
+C Intel NHM	 2.8
+C Intel SBR	 2.4
+C Intel atom	 5.33
+C VIA nano	 3
+
+C NOTES
+C  * It might seem natural to use the cmov insn here, but since this function
+C    is supposed to have the exact same execution pattern for cnd true and
+C    false, and since cmov's documentation is not clear about whether it
+C    actually reads both source operands and writes the register for a false
+C    condition, we cannot use it.
+C  * Two cases could be optimised: (1) cnd_add_n could use ADCSBB-from-memory
+C    to save one insn/limb, and (2) when up=rp cnd_add_n and cnd_sub_n could use
+C    ADCSBB-to-memory, again saving 1 insn/limb.
+C  * This runs optimally at decoder bandwidth on K10.  It has not been tuned
+C    for any other processor.
+
+C INPUT PARAMETERS
+define(`cnd',	`%rdi')	dnl rcx
+define(`rp',	`%rsi')	dnl rdx
+define(`up',	`%rdx')	dnl r8
+define(`vp',	`%rcx')	dnl r9
+define(`n',	`%r8')	dnl rsp+40
+
+ifdef(`OPERATION_cnd_add_n', `
+	define(ADDSUB,	      add)
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+	define(ADDSUB,	      sub)
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), R32(%r8)')
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+
+	neg	cnd
+	sbb	cnd, cnd		C make cnd mask
+
+	lea	(vp,n,8), vp
+	lea	(up,n,8), up
+	lea	(rp,n,8), rp
+
+	mov	R32(n), R32(%rax)
+	neg	n
+	and	$3, R32(%rax)
+	jz	L(top)			C carry-save reg rax = 0 in this arc
+	cmp	$2, R32(%rax)
+	jc	L(b1)
+	jz	L(b2)
+
+L(b3):	mov	(vp,n,8), %r12
+	mov	8(vp,n,8), %r13
+	mov	16(vp,n,8), %r14
+	and	cnd, %r12
+	mov	(up,n,8), %r10
+	and	cnd, %r13
+	mov	8(up,n,8), %rbx
+	and	cnd, %r14
+	mov	16(up,n,8), %rbp
+	ADDSUB	%r12, %r10
+	mov	%r10, (rp,n,8)
+	ADCSBB	%r13, %rbx
+	mov	%rbx, 8(rp,n,8)
+	ADCSBB	%r14, %rbp
+	mov	%rbp, 16(rp,n,8)
+	sbb	R32(%rax), R32(%rax)	C save carry
+	add	$3, n
+	js	L(top)
+	jmp	L(end)
+
+L(b2):	mov	(vp,n,8), %r12
+	mov	8(vp,n,8), %r13
+	mov	(up,n,8), %r10
+	and	cnd, %r12
+	mov	8(up,n,8), %rbx
+	and	cnd, %r13
+	ADDSUB	%r12, %r10
+	mov	%r10, (rp,n,8)
+	ADCSBB	%r13, %rbx
+	mov	%rbx, 8(rp,n,8)
+	sbb	R32(%rax), R32(%rax)	C save carry
+	add	$2, n
+	js	L(top)
+	jmp	L(end)
+
+L(b1):	mov	(vp,n,8), %r12
+	mov	(up,n,8), %r10
+	and	cnd, %r12
+	ADDSUB	%r12, %r10
+	mov	%r10, (rp,n,8)
+	sbb	R32(%rax), R32(%rax)	C save carry
+	add	$1, n
+	jns	L(end)
+
+	ALIGN(16)
+L(top):	mov	(vp,n,8), %r12
+	mov	8(vp,n,8), %r13
+	mov	16(vp,n,8), %r14
+	mov	24(vp,n,8), %r11
+	and	cnd, %r12
+	mov	(up,n,8), %r10
+	and	cnd, %r13
+	mov	8(up,n,8), %rbx
+	and	cnd, %r14
+	mov	16(up,n,8), %rbp
+	and	cnd, %r11
+	mov	24(up,n,8), %r9
+	add	R32(%rax), R32(%rax)	C restore carry
+	ADCSBB	%r12, %r10
+	mov	%r10, (rp,n,8)
+	ADCSBB	%r13, %rbx
+	mov	%rbx, 8(rp,n,8)
+	ADCSBB	%r14, %rbp
+	mov	%rbp, 16(rp,n,8)
+	ADCSBB	%r11, %r9
+	mov	%r9, 24(rp,n,8)
+	sbb	R32(%rax), R32(%rax)	C save carry
+	add	$4, n
+	js	L(top)
+
+L(end):	neg	R32(%rax)
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm
new file mode 100644
index 0000000..31bc4b6
--- /dev/null
+++ b/mpn/x86_64/com.asm

@@ -0,0 +1,95 @@
+dnl  AMD64 mpn_com.
+
+dnl  Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	    cycles/limb
+C AMD K8,K9	 1.25
+C AMD K10	 1.25
+C Intel P4	 2.78
+C Intel core2	 1.1
+C Intel corei	 1.5
+C Intel atom	 ?
+C VIA nano	 2
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_com)
+	FUNC_ENTRY(3)
+	movq	(up), %r8
+	movl	R32(%rdx), R32(%rax)
+	leaq	(up,n,8), up
+	leaq	(rp,n,8), rp
+	negq	n
+	andl	$3, R32(%rax)
+	je	L(b00)
+	cmpl	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	notq	%r8
+	movq	%r8, (rp,n,8)
+	decq	n
+	jmp	L(e11)
+L(b10):	addq	$-2, n
+	jmp	L(e10)
+	.byte	0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):	notq	%r8
+	movq	%r8, (rp,n,8)
+	incq	n
+	jz	L(ret)
+
+L(oop):	movq	(up,n,8), %r8
+L(b00):	movq	8(up,n,8), %r9
+	notq	%r8
+	notq	%r9
+	movq	%r8, (rp,n,8)
+	movq	%r9, 8(rp,n,8)
+L(e11):	movq	16(up,n,8), %r8
+L(e10):	movq	24(up,n,8), %r9
+	notq	%r8
+	notq	%r9
+	movq	%r8, 16(rp,n,8)
+	movq	%r9, 24(rp,n,8)
+	addq	$4, n
+	jnc	L(oop)
+L(ret):	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm
new file mode 100644
index 0000000..c8f9d84
--- /dev/null
+++ b/mpn/x86_64/copyd.asm

@@ -0,0 +1,93 @@
+dnl  AMD64 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 1
+C AMD K10	 1
+C AMD bd1	 1.36
+C AMD bobcat	 1.71
+C Intel P4	 2-3
+C Intel core2	 1
+C Intel NHM	 1
+C Intel SBR	 1
+C Intel atom	 2
+C VIA nano	 2
+
+
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
+
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_copyd)
+	lea	-8(up,n,8), up
+	lea	(rp,n,8), rp
+	sub	$4, n
+	jc	L(end)
+	nop
+
+L(top):	mov	(up), %rax
+	mov	-8(up), %r9
+	lea	-32(rp), rp
+	mov	-16(up), %r10
+	mov	-24(up), %r11
+	lea	-32(up), up
+	mov	%rax, 24(rp)
+	mov	%r9, 16(rp)
+	sub	$4, n
+	mov	%r10, 8(rp)
+	mov	%r11, (rp)
+	jnc	L(top)
+
+L(end):	shr	R32(n)
+	jnc	1f
+	mov	(up), %rax
+	mov	%rax, -8(rp)
+	lea	-8(rp), rp
+	lea	-8(up), up
+1:	shr	R32(n)
+	jnc	1f
+	mov	(up), %rax
+	mov	-8(up), %r9
+	mov	%rax, -8(rp)
+	mov	%r9, -16(rp)
+1:	ret
+EPILOGUE()

diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm
new file mode 100644
index 0000000..9258bc5
--- /dev/null
+++ b/mpn/x86_64/copyi.asm

@@ -0,0 +1,92 @@
+dnl  AMD64 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 1
+C AMD K10	 1
+C AMD bd1	 1.36
+C AMD bobcat	 1.71
+C Intel P4	 2-3
+C Intel core2	 1
+C Intel NHM	 1
+C Intel SBR	 1
+C Intel atom	 2
+C VIA nano	 2
+
+
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
+
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(64)
+	.byte	0,0,0,0,0,0
+PROLOGUE(mpn_copyi)
+	lea	-8(rp), rp
+	sub	$4, n
+	jc	L(end)
+
+L(top):	mov	(up), %rax
+	mov	8(up), %r9
+	lea	32(rp), rp
+	mov	16(up), %r10
+	mov	24(up), %r11
+	lea	32(up), up
+	mov	%rax, -24(rp)
+	mov	%r9, -16(rp)
+	sub	$4, n
+	mov	%r10, -8(rp)
+	mov	%r11, (rp)
+	jnc	L(top)
+
+L(end):	shr	R32(n)
+	jnc	1f
+	mov	(up), %rax
+	mov	%rax, 8(rp)
+	lea	8(rp), rp
+	lea	8(up), up
+1:	shr	R32(n)
+	jnc	1f
+	mov	(up), %rax
+	mov	8(up), %r9
+	mov	%rax, 8(rp)
+	mov	%r9, 16(rp)
+1:	ret
+EPILOGUE()

diff --git a/mpn/x86_64/darwin.m4 b/mpn/x86_64/darwin.m4
new file mode 100644
index 0000000..7771476
--- /dev/null
+++ b/mpn/x86_64/darwin.m4

@@ -0,0 +1,82 @@
+divert(-1)
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+define(`DARWIN')
+
+define(`LEA',`dnl
+ifdef(`PIC',
+	`lea	$1(%rip), $2'
+,
+	`movabs	`$'$1, $2')
+')
+
+dnl  Usage: CALL(funcname)
+dnl
+dnl  Simply override the definition in x86_64-defs.m4.
+
+define(`CALL',`call	GSYM_PREFIX`'$1')
+define(`TCALL',`jmp	GSYM_PREFIX`'$1')
+
+
+dnl  Usage: JUMPTABSECT
+dnl
+dnl  CAUTION: Do not put anything sensible here, like RODATA.  That works with
+dnl  some Darwin tool chains, but silently breaks with other.  (Note that
+dnl  putting jump tables in the text segment is a really poor idea for many PC
+dnl  processors, since they cannot cache the same thing in both L1D and L2I.)
+
+define(`JUMPTABSECT', `.text')
+
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+	`.set	$1_tmp, $1-$2
+	.long	$1_tmp'
+,
+	`.quad	$1'
+)')
+
+dnl  Target ABI macros.  For Darwin we override IFELF (and leave default for
+dnl  IFDOS and IFSTD).
+
+define(`IFELF',   `')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `.private_extern $1')
+
+
+divert`'dnl

diff --git a/mpn/x86_64/div_qr_1n_pi1.asm b/mpn/x86_64/div_qr_1n_pi1.asm
new file mode 100644
index 0000000..66c4f32
--- /dev/null
+++ b/mpn/x86_64/div_qr_1n_pi1.asm

@@ -0,0 +1,247 @@
+dnl  x86-64 mpn_div_qr_1n_pi1
+dnl  -- Divide an mpn number by a normalized single-limb number,
+dnl     using a single-limb inverse.
+
+dnl  Contributed to the GNU project by Niels Möller
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		c/l
+C AMD K8,K9	13
+C AMD K10	13
+C AMD bull	16.5
+C AMD pile	15
+C AMD steam	 ?
+C AMD bobcat	16
+C AMD jaguar	 ?
+C Intel P4	47	poor
+C Intel core	19.25
+C Intel NHM	18
+C Intel SBR	15	poor
+C Intel IBR	13
+C Intel HWL	11.7
+C Intel BWL	 ?
+C Intel atom	52	very poor
+C VIA nano	19
+
+
+C INPUT Parameters
+define(`QP', `%rdi')
+define(`UP', `%rsi')
+define(`UN_INPUT', `%rdx')
+define(`U1', `%rcx')	C Also in %rax
+define(`D', `%r8')
+define(`DINV', `%r9')
+
+C Invariants
+define(`B2', `%rbp')
+define(`B2md', `%rbx')
+
+C Variables
+define(`UN', `%r8')	C Overlaps D input
+define(`T', `%r10')
+define(`U0', `%r11')
+define(`U2', `%r12')
+define(`Q0', `%r13')
+define(`Q1', `%r14')
+define(`Q2', `%r15')
+
+ABI_SUPPORT(STD64)
+
+	ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_div_qr_1n_pi1)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+IFDOS(`	mov	64(%rsp), %r9	')
+	dec	UN_INPUT
+	jnz	L(first)
+
+	C Just a single 2/1 division.
+	C T, U0 are allocated in scratch registers
+	lea	1(U1), T
+	mov	U1, %rax
+	mul	DINV
+	mov	(UP), U0
+	add	U0, %rax
+	adc	T, %rdx
+	mov	%rdx, T
+	imul	D, %rdx
+	sub	%rdx, U0
+	cmp	U0, %rax
+	lea	(U0, D), %rax
+	cmovnc	U0, %rax
+	sbb	$0, T
+	cmp	D, %rax
+	jc	L(single_div_done)
+	sub	D, %rax
+	add	$1, T
+L(single_div_done):
+	mov	T, (QP)
+	FUNC_EXIT()
+	ret
+L(first):
+	C FIXME: Could delay some of these until we enter the loop.
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	push	%rbx
+	push	%rbp
+
+	mov	D, B2
+	imul	DINV, B2
+	neg	B2
+	mov	B2, B2md
+	sub	D, B2md
+
+	C D not needed until final reduction
+	push	D
+	mov	UN_INPUT, UN	C Clobbers D
+
+	mov	DINV, %rax
+	mul	U1
+	mov	%rax, Q0
+	add	U1, %rdx
+	mov	%rdx, T
+
+	mov	B2, %rax
+	mul	U1
+	mov	-8(UP, UN, 8), U0
+	mov	(UP, UN, 8), U1
+	mov	T, (QP, UN, 8)
+	add	%rax, U0
+	adc	%rdx, U1
+	sbb	U2, U2
+	dec	UN
+	mov	U1, %rax
+	jz	L(final)
+
+	ALIGN(16)
+
+	C Loop is 28 instructions, 30 decoder slots, should run in 10 cycles.
+	C At entry, %rax holds an extra copy of U1
+L(loop):
+	C {Q2, Q1, Q0} <-- DINV * U1 + B (Q0 + U2 DINV) + B^2 U2
+	C Remains to add in B (U1 + c)
+	mov	DINV, Q1
+	mov	U2, Q2
+	and	U2, Q1
+	neg	Q2
+	mul	DINV
+	add	%rdx, Q1
+	adc	$0, Q2
+	add	Q0, Q1
+	mov	%rax, Q0
+	mov	B2, %rax
+	lea	(B2md, U0), T
+	adc	$0, Q2
+
+	C {U2, U1, U0} <-- (U0 + U2 B2 -c U) B + U1 B2 + u
+	mul	U1
+	and	B2, U2
+	add	U2, U0
+	cmovnc	U0, T
+
+	C {QP+UN, ...} <-- {QP+UN, ...} + {Q2, Q1} + U1 + c
+	adc	U1, Q1
+	mov	-8(UP, UN, 8), U0
+	adc	Q2, 8(QP, UN, 8)
+	jc	L(q_incr)
+L(q_incr_done):
+	add	%rax, U0
+	mov	T, %rax
+	adc	%rdx, %rax
+	mov	Q1, (QP, UN, 8)
+	sbb	U2, U2
+	dec	UN
+	mov	%rax, U1
+	jnz	L(loop)
+
+L(final):
+	pop	D
+
+	mov	U2, Q1
+	and	D, U2
+	sub	U2, %rax
+	neg	Q1
+
+	mov	%rax, U1
+	sub	D, %rax
+	cmovc	U1, %rax
+	sbb	$-1, Q1
+
+	lea	1(%rax), T
+	mul	DINV
+	add	U0, %rax
+	adc	T, %rdx
+	mov	%rdx, T
+	imul	D, %rdx
+	sub	%rdx, U0
+	cmp	U0, %rax
+	lea	(U0, D), %rax
+	cmovnc	U0, %rax
+	sbb	$0, T
+	cmp	D, %rax
+	jc	L(div_done)
+	sub	D, %rax
+	add	$1, T
+L(div_done):
+	add	T, Q0
+	mov	Q0, (QP)
+	adc	Q1, 8(QP)
+	jnc	L(done)
+L(final_q_incr):
+	addq	$1, 16(QP)
+	lea	8(QP), QP
+	jc	L(final_q_incr)
+
+L(done):
+	pop	%rbp
+	pop	%rbx
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	pop	%r15
+	FUNC_EXIT()
+	ret
+
+L(q_incr):
+	C U1 is not live, so use it for indexing
+	lea	16(QP, UN, 8), U1
+L(q_incr_loop):
+	addq	$1, (U1)
+	jnc	L(q_incr_done)
+	lea	8(U1), U1
+	jmp	L(q_incr_loop)
+EPILOGUE()

diff --git a/mpn/x86_64/div_qr_2n_pi1.asm b/mpn/x86_64/div_qr_2n_pi1.asm
new file mode 100644
index 0000000..6b1f5b1
--- /dev/null
+++ b/mpn/x86_64/div_qr_2n_pi1.asm

@@ -0,0 +1,158 @@
+dnl  x86-64 mpn_div_qr_2n_pi1
+dnl  -- Divide an mpn number by a normalized 2-limb number,
+dnl     using a single-limb inverse.
+
+dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		c/l
+C INPUT PARAMETERS
+define(`qp',		`%rdi')
+define(`rp',		`%rsi')
+define(`up_param',	`%rdx')
+define(`un',		`%rcx')
+define(`d1',		`%r8')
+define(`d0',		`%r9')
+define(`di_param',	`8(%rsp)')
+
+define(`di',		`%r10')
+define(`up',		`%r11')
+define(`u2',		`%rbx')
+define(`u1',		`%r12')
+define(`t1',		`%r13')
+define(`t0',		`%r14')
+define(`md1',		`%r15')
+
+C TODO
+C * Store qh in the same stack slot as di_param, instead of pushing
+C   it. (we could put it in register %rbp, but then we would need to
+C   save and restore that instead, which doesn't seem like a win).
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_div_qr_2n_pi1)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+IFDOS(`	mov	64(%rsp), %r9	')
+IFDOS(`define(`di_param', `72(%rsp)')')
+	mov	di_param, di
+	mov	up_param, up
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	push	%rbx
+
+	mov	-16(up, un, 8), u1
+	mov	-8(up, un, 8), u2
+
+	mov	u1, t0
+	mov	u2, t1
+	sub	d0, t0
+	sbb	d1, t1
+	cmovnc  t0, u1
+	cmovnc	t1, u2
+	C push qh which is !carry
+	sbb	%rax, %rax
+	inc	%rax
+	push	%rax
+	lea	-2(un), un
+	mov	d1, md1
+	neg	md1
+
+	jmp	L(next)
+
+	ALIGN(16)
+L(loop):
+	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+	C Based on the optimized divrem_2.asm code.
+
+	mov	di, %rax
+	mul	u2
+	mov	u1, t0
+	add	%rax, t0	C q0 in t0
+	adc	u2, %rdx
+	mov	%rdx, t1	C q in t1
+	imul	md1, %rdx
+	mov	d0, %rax
+	lea	(%rdx, u1), u2
+	mul	t1
+	mov	(up, un, 8), u1
+	sub	d0, u1
+	sbb	d1, u2
+	sub	%rax, u1
+	sbb	%rdx, u2
+	xor	R32(%rax), R32(%rax)
+	xor	R32(%rdx), R32(%rdx)
+	cmp	t0, u2
+	cmovnc	d0, %rax
+	cmovnc	d1, %rdx
+	adc	$0, t1
+	nop
+	add	%rax, u1
+	adc	%rdx, u2
+	cmp	d1, u2
+	jae	L(fix)
+L(bck):
+	mov	t1, (qp, un, 8)
+L(next):
+	sub	$1, un
+	jnc	L(loop)
+L(end):
+	mov	u2, 8(rp)
+	mov	u1, (rp)
+
+	C qh on stack
+	pop	%rax
+
+	pop	%rbx
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	pop	%r15
+	FUNC_EXIT()
+	ret
+
+L(fix):	C Unlikely update. u2 >= d1
+	seta	%dl
+	cmp	d0, u1
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	je	L(bck)
+	inc	t1
+	sub	d0, u1
+	sbb	d1, u2
+	jmp	L(bck)
+EPILOGUE()

diff --git a/mpn/x86_64/div_qr_2u_pi1.asm b/mpn/x86_64/div_qr_2u_pi1.asm
new file mode 100644
index 0000000..2561fd4
--- /dev/null
+++ b/mpn/x86_64/div_qr_2u_pi1.asm

@@ -0,0 +1,200 @@
+dnl  x86-64 mpn_div_qr_2u_pi1
+dnl  -- Divide an mpn number by an unnormalized 2-limb number,
+dnl     using a single-limb inverse and shifting the dividend on the fly.
+
+dnl  Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		c/l
+C INPUT PARAMETERS
+define(`qp',		`%rdi')
+define(`rp',		`%rsi')
+define(`up_param',	`%rdx')
+define(`un_param',	`%rcx') dnl %rcx needed for shift count
+define(`d1',		`%r8')
+define(`d0',		`%r9')
+define(`shift_param',	`FRAME+8(%rsp)')
+define(`di_param',	`FRAME+16(%rsp)')
+
+define(`di',		`%r10')
+define(`up',		`%r11')
+define(`un',		`%rbp')
+define(`u2',		`%rbx')
+define(`u1',		`%r12')
+define(`u0',		`%rsi') dnl Same as rp, which is saved and restored.
+define(`t1',		`%r13')
+define(`t0',		`%r14')
+define(`md1',		`%r15')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME', 0)
+PROLOGUE(mpn_div_qr_2u_pi1)
+	mov	di_param, di
+	mov	up_param, up
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	push	%rbx
+	push	%rbp
+	push	rp
+deflit(`FRAME', 56)
+	lea	-2(un_param), un
+	mov	d1, md1
+	neg	md1
+
+	C int parameter, 32 bits only
+	movl	shift_param, R32(%rcx)
+
+	C FIXME: Different code for SHLD_SLOW
+
+	xor	R32(u2), R32(u2)
+	mov	8(up, un, 8), u1
+	shld	%cl, u1, u2
+	C Remains to read (up, un, 8) and shift u1, u0
+	C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
+	mov	di, %rax
+	mul	u2
+	mov	(up, un, 8), u0
+	shld	%cl, u0, u1
+	mov	u1, t0
+	add	%rax, t0	C q0 in t0
+	adc	u2, %rdx
+	mov	%rdx, t1	C q in t1
+	imul	md1, %rdx
+	mov	d0, %rax
+	lea	(%rdx, u1), u2
+	mul	t1
+	mov	u0, u1
+	shl	%cl, u1
+	sub	d0, u1
+	sbb	d1, u2
+	sub	%rax, u1
+	sbb	%rdx, u2
+	xor	R32(%rax), R32(%rax)
+	xor	R32(%rdx), R32(%rdx)
+	cmp	t0, u2
+	cmovnc	d0, %rax
+	cmovnc	d1, %rdx
+	adc	$0, t1
+	nop
+	add	%rax, u1
+	adc	%rdx, u2
+	cmp	d1, u2
+	jae	L(fix_qh)
+L(bck_qh):
+	push	t1	C push qh on stack
+
+	jmp	L(next)
+
+	ALIGN(16)
+L(loop):
+	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+	C Based on the optimized divrem_2.asm code.
+
+	mov	di, %rax
+	mul	u2
+	mov	(up, un, 8), u0
+	xor	R32(t1), R32(t1)
+	shld	%cl, u0, t1
+	or	t1, u1
+	mov	u1, t0
+	add	%rax, t0	C q0 in t0
+	adc	u2, %rdx
+	mov	%rdx, t1	C q in t1
+	imul	md1, %rdx
+	mov	d0, %rax
+	lea	(%rdx, u1), u2
+	mul	t1
+	mov	u0, u1
+	shl	%cl, u1
+	sub	d0, u1
+	sbb	d1, u2
+	sub	%rax, u1
+	sbb	%rdx, u2
+	xor	R32(%rax), R32(%rax)
+	xor	R32(%rdx), R32(%rdx)
+	cmp	t0, u2
+	cmovnc	d0, %rax
+	cmovnc	d1, %rdx
+	adc	$0, t1
+	nop
+	add	%rax, u1
+	adc	%rdx, u2
+	cmp	d1, u2
+	jae	L(fix)
+L(bck):
+	mov	t1, (qp, un, 8)
+L(next):
+	sub	$1, un
+	jnc	L(loop)
+L(end):
+	C qh on stack
+	pop	%rax
+	pop	rp
+	shrd	%cl, u2, u1
+	shr	%cl, u2
+	mov	u2, 8(rp)
+	mov	u1, (rp)
+
+	pop	%rbp
+	pop	%rbx
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	pop	%r15
+	ret
+
+L(fix):	C Unlikely update. u2 >= d1
+	seta	%dl
+	cmp	d0, u1
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	je	L(bck)
+	inc	t1
+	sub	d0, u1
+	sbb	d1, u2
+	jmp	L(bck)
+
+C Duplicated, just jumping back to a different address.
+L(fix_qh):	C Unlikely update. u2 >= d1
+	seta	%dl
+	cmp	d0, u1
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	je	L(bck_qh)
+	inc	t1
+	sub	d0, u1
+	sbb	d1, u2
+	jmp	L(bck_qh)
+EPILOGUE()

diff --git a/mpn/x86_64/dive_1.asm b/mpn/x86_64/dive_1.asm
new file mode 100644
index 0000000..d9a30b1
--- /dev/null
+++ b/mpn/x86_64/dive_1.asm

@@ -0,0 +1,158 @@
+dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2004-2006, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	10
+C AMD K10	10
+C Intel P4	33
+C Intel core2	13.25
+C Intel corei	14
+C Intel atom	42
+C VIA nano	43
+
+C A quick adoption of the 32-bit K7 code.
+
+
+C INPUT PARAMETERS
+C rp		rdi
+C up		rsi
+C n		rdx
+C divisor	rcx
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+	FUNC_ENTRY(4)
+	push	%rbx
+
+	mov	%rcx, %rax
+	xor	R32(%rcx), R32(%rcx)	C shift count
+	mov	%rdx, %r8
+
+	bt	$0, R32(%rax)
+	jnc	L(evn)			C skip bsfq unless divisor is even
+
+L(odd):	mov	%rax, %rbx
+	shr	R32(%rax)
+	and	$127, R32(%rax)		C d/2, 7 bits
+
+	LEA(	binvert_limb_table, %rdx)
+
+	movzbl	(%rdx,%rax), R32(%rax)	C inv 8 bits
+
+	mov	%rbx, %r11		C d without twos
+
+	lea	(%rax,%rax), R32(%rdx)	C 2*inv
+	imul	R32(%rax), R32(%rax)	C inv*inv
+	imul	R32(%rbx), R32(%rax)	C inv*inv*d
+	sub	R32(%rax), R32(%rdx)	C inv = 2*inv - inv*inv*d, 16 bits
+
+	lea	(%rdx,%rdx), R32(%rax)	C 2*inv
+	imul	R32(%rdx), R32(%rdx)	C inv*inv
+	imul	R32(%rbx), R32(%rdx)	C inv*inv*d
+	sub	R32(%rdx), R32(%rax)	C inv = 2*inv - inv*inv*d, 32 bits
+
+	lea	(%rax,%rax), %r10	C 2*inv
+	imul	%rax, %rax		C inv*inv
+	imul	%rbx, %rax		C inv*inv*d
+	sub	%rax, %r10		C inv = 2*inv - inv*inv*d, 64 bits
+
+	lea	(%rsi,%r8,8), %rsi	C up end
+	lea	-8(%rdi,%r8,8), %rdi	C rp end
+	neg	%r8			C -n
+
+	mov	(%rsi,%r8,8), %rax	C up[0]
+
+	inc	%r8
+	jz	L(one)
+
+	mov	(%rsi,%r8,8), %rdx	C up[1]
+
+	shrd	R8(%rcx), %rdx, %rax
+
+	xor	R32(%rbx), R32(%rbx)
+	jmp	L(ent)
+
+L(evn):	bsf	%rax, %rcx
+	shr	R8(%rcx), %rax
+	jmp	L(odd)
+
+	ALIGN(8)
+L(top):
+	C rax	q
+	C rbx	carry bit, 0 or 1
+	C rcx	shift
+	C rdx
+	C rsi	up end
+	C rdi	rp end
+	C r8	counter, limbs, negative
+	C r10	d^(-1) mod 2^64
+	C r11	d, shifted down
+
+	mul	%r11			C carry limb in rdx	0 10
+	mov	-8(%rsi,%r8,8), %rax	C
+	mov	(%rsi,%r8,8), %r9	C
+	shrd	R8(%rcx), %r9, %rax	C
+	nop				C
+	sub	%rbx, %rax		C apply carry bit
+	setc	%bl			C
+	sub	%rdx, %rax		C apply carry limb	5
+	adc	$0, %rbx		C			6
+L(ent):	imul	%r10, %rax		C			6
+	mov	%rax, (%rdi,%r8,8)	C
+	inc	%r8			C
+	jnz	L(top)
+
+	mul	%r11			C carry limb in rdx
+	mov	-8(%rsi), %rax		C up high limb
+	shr	R8(%rcx), %rax
+	sub	%rbx, %rax		C apply carry bit
+	sub	%rdx, %rax		C apply carry limb
+	imul	%r10, %rax
+	mov	%rax, (%rdi)
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+
+L(one):	shr	R8(%rcx), %rax
+	imul	%r10, %rax
+	mov	%rax, (%rdi)
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+
+EPILOGUE()

diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm
new file mode 100644
index 0000000..a93e34f
--- /dev/null
+++ b/mpn/x86_64/divrem_1.asm

@@ -0,0 +1,314 @@
+dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		norm	unorm	frac
+C AMD K8,K9	13	13	12
+C AMD K10	13	13	12
+C Intel P4	43	44	43
+C Intel core2	24.5	24.5	19.5
+C Intel corei	20.5	19.5	18
+C Intel atom	43	46	36
+C VIA nano	25.5	25.5	24
+
+C mp_limb_t
+C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
+
+C mp_limb_t
+C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
+C                      mp_limb_t dinv, int cnt)
+
+C INPUT PARAMETERS
+define(`qp',		`%rdi')
+define(`fn_param',	`%rsi')
+define(`up_param',	`%rdx')
+define(`un_param',	`%rcx')
+define(`d',		`%r8')
+define(`dinv',		`%r9')		C only for mpn_preinv_divrem_1
+C       shift passed on stack		C only for mpn_preinv_divrem_1
+
+define(`cnt',		`%rcx')
+define(`up',		`%rsi')
+define(`fn',		`%r12')
+define(`un',		`%rbx')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
+C         cnt         qp      d  dinv
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF',		`40($1)')')
+IFDOS(`define(`CNTOFF',		`104($1)')')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+IFDOS(`	mov	64(%rsp), %r9	')
+	xor	R32(%rax), R32(%rax)
+	push	%r13
+	push	%r12
+	push	%rbp
+	push	%rbx
+
+	mov	fn_param, fn
+	mov	un_param, un
+	add	fn_param, un_param
+	mov	up_param, up
+
+	lea	-8(qp,un_param,8), qp
+
+	test	d, d
+	js	L(nent)
+
+	mov	CNTOFF(%rsp), R8(cnt)
+	shl	R8(cnt), d
+	jmp	L(uent)
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+	xor	R32(%rax), R32(%rax)
+	push	%r13
+	push	%r12
+	push	%rbp
+	push	%rbx
+
+	mov	fn_param, fn
+	mov	un_param, un
+	add	fn_param, un_param
+	mov	up_param, up
+	je	L(ret)
+
+	lea	-8(qp,un_param,8), qp
+	xor	R32(%rbp), R32(%rbp)
+
+	test	d, d
+	jns	L(unnormalized)
+
+L(normalized):
+	test	un, un
+	je	L(8)			C un == 0
+	mov	-8(up,un,8), %rbp
+	dec	un
+	mov	%rbp, %rax
+	sub	d, %rbp
+	cmovc	%rax, %rbp
+	sbb	R32(%rax), R32(%rax)
+	inc	R32(%rax)
+	mov	%rax, (qp)
+	lea	-8(qp), qp
+L(8):
+IFSTD(`	push	%rdi		')
+IFSTD(`	push	%rsi		')
+	push	%r8
+IFSTD(`	mov	d, %rdi		')
+IFDOS(`	sub	$32, %rsp	')
+IFDOS(`	mov	d, %rcx		')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFDOS(`	add	$32, %rsp	')
+	pop	%r8
+IFSTD(`	pop	%rsi		')
+IFSTD(`	pop	%rdi		')
+
+	mov	%rax, dinv
+	mov	%rbp, %rax
+	jmp	L(nent)
+
+	ALIGN(16)
+L(ntop):mov	(up,un,8), %r10		C	    K8-K10  P6-CNR P6-NHM  P4
+	mul	dinv			C	      0,13   0,20   0,18   0,45
+	add	%r10, %rax		C	      4      8      3     12
+	adc	%rbp, %rdx		C	      5      9     10     13
+	mov	%rax, %rbp		C	      5      9      4     13
+	mov	%rdx, %r13		C	      6     11     12     23
+	imul	d, %rdx			C	      6     11     11     23
+	sub	%rdx, %r10		C	     10     16     14     33
+	mov	d, %rax			C
+	add	%r10, %rax		C	     11     17     15     34
+	cmp	%rbp, %r10		C	     11     17     15     34
+	cmovc	%r10, %rax		C	     12     18     16     35
+	adc	$-1, %r13		C
+	cmp	d, %rax			C
+	jae	L(nfx)			C
+L(nok):	mov	%r13, (qp)		C
+	sub	$8, qp			C
+L(nent):lea	1(%rax), %rbp		C
+	dec	un			C
+	jns	L(ntop)			C
+
+	xor	R32(%rcx), R32(%rcx)
+	jmp	L(frac)
+
+L(nfx):	sub	d, %rax
+	inc	%r13
+	jmp	L(nok)
+
+L(unnormalized):
+	test	un, un
+	je	L(44)
+	mov	-8(up,un,8), %rax
+	cmp	d, %rax
+	jae	L(44)
+	mov	%rbp, (qp)
+	mov	%rax, %rbp
+	lea	-8(qp), qp
+	je	L(ret)
+	dec	un
+L(44):
+	bsr	d, %rcx
+	not	R32(%rcx)
+	shl	R8(%rcx), d
+	shl	R8(%rcx), %rbp
+
+	push	%rcx
+IFSTD(`	push	%rdi		')
+IFSTD(`	push	%rsi		')
+	push	%r8
+IFSTD(`	sub	$8, %rsp	')
+IFSTD(`	mov	d, %rdi		')
+IFDOS(`	sub	$40, %rsp	')
+IFDOS(`	mov	d, %rcx		')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFSTD(`	add	$8, %rsp	')
+IFDOS(`	add	$40, %rsp	')
+	pop	%r8
+IFSTD(`	pop	%rsi		')
+IFSTD(`	pop	%rdi		')
+	pop	%rcx
+
+	mov	%rax, dinv
+	mov	%rbp, %rax
+	test	un, un
+	je	L(frac)
+
+L(uent):dec	un
+	mov	(up,un,8), %rbp
+	neg	R32(%rcx)
+	shr	R8(%rcx), %rbp
+	neg	R32(%rcx)
+	or	%rbp, %rax
+	jmp	L(ent)
+
+	ALIGN(16)
+L(utop):mov	(up,un,8), %r10
+	shl	R8(%rcx), %rbp
+	neg	R32(%rcx)
+	shr	R8(%rcx), %r10
+	neg	R32(%rcx)
+	or	%r10, %rbp
+	mul	dinv
+	add	%rbp, %rax
+	adc	%r11, %rdx
+	mov	%rax, %r11
+	mov	%rdx, %r13
+	imul	d, %rdx
+	sub	%rdx, %rbp
+	mov	d, %rax
+	add	%rbp, %rax
+	cmp	%r11, %rbp
+	cmovc	%rbp, %rax
+	adc	$-1, %r13
+	cmp	d, %rax
+	jae	L(ufx)
+L(uok):	mov	%r13, (qp)
+	sub	$8, qp
+L(ent):	mov	(up,un,8), %rbp
+	dec	un
+	lea	1(%rax), %r11
+	jns	L(utop)
+
+L(uend):shl	R8(%rcx), %rbp
+	mul	dinv
+	add	%rbp, %rax
+	adc	%r11, %rdx
+	mov	%rax, %r11
+	mov	%rdx, %r13
+	imul	d, %rdx
+	sub	%rdx, %rbp
+	mov	d, %rax
+	add	%rbp, %rax
+	cmp	%r11, %rbp
+	cmovc	%rbp, %rax
+	adc	$-1, %r13
+	cmp	d, %rax
+	jae	L(efx)
+L(eok):	mov	%r13, (qp)
+	sub	$8, qp
+	jmp	L(frac)
+
+L(ufx):	sub	d, %rax
+	inc	%r13
+	jmp	L(uok)
+L(efx):	sub	d, %rax
+	inc	%r13
+	jmp	L(eok)
+
+L(frac):mov	d, %rbp
+	neg	%rbp
+	jmp	L(fent)
+
+	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
+L(ftop):mul	dinv			C	      0,12   0,17   0,17
+	add	%r11, %rdx		C	      5      8     10
+	mov	%rax, %r11		C	      4      8      3
+	mov	%rdx, %r13		C	      6      9     11
+	imul	%rbp, %rdx		C	      6      9     11
+	mov	d, %rax			C
+	add	%rdx, %rax		C	     10     14     14
+	cmp	%r11, %rdx		C	     10     14     14
+	cmovc	%rdx, %rax		C	     11     15     15
+	adc	$-1, %r13		C
+	mov	%r13, (qp)		C
+	sub	$8, qp			C
+L(fent):lea	1(%rax), %r11		C
+	dec	fn			C
+	jns	L(ftop)			C
+
+	shr	R8(%rcx), %rax
+L(ret):	pop	%rbx
+	pop	%rbp
+	pop	%r12
+	pop	%r13
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/divrem_2.asm b/mpn/x86_64/divrem_2.asm
new file mode 100644
index 0000000..9bf137f
--- /dev/null
+++ b/mpn/x86_64/divrem_2.asm

@@ -0,0 +1,192 @@
+dnl  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb	best
+C AMD K8,K9	18
+C AMD K10	18
+C AMD bull
+C AMD pile
+C AMD bobcat
+C AMD jaguar
+C Intel P4	68
+C Intel core	34
+C Intel NHM	30.25
+C Intel SBR	21.3
+C Intel IBR	21.4
+C Intel HWL	20.6
+C Intel BWL
+C Intel atom	73
+C VIA nano	33
+
+
+C INPUT PARAMETERS
+define(`qp',		`%rdi')
+define(`fn',		`%rsi')
+define(`up_param',	`%rdx')
+define(`un_param',	`%rcx')
+define(`dp',		`%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_divrem_2)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	lea	-24(%rdx,%rcx,8), %r12	C r12 = &up[un-1]
+	mov	%rsi, %r13
+	push	%rbp
+	mov	%rdi, %rbp
+	push	%rbx
+	mov	8(%r8), %r11		C d1
+	mov	16(%r12), %rbx
+	mov	(%r8), %r8		C d0
+	mov	8(%r12), %r10
+
+	xor	R32(%r15), R32(%r15)
+	cmp	%rbx, %r11
+	ja	L(2)
+	setb	%dl
+	cmp	%r10, %r8
+	setbe	%al
+	orb	%al, %dl		C "orb" form to placate Sun tools
+	je	L(2)
+	inc	R32(%r15)
+	sub	%r8, %r10
+	sbb	%r11, %rbx
+L(2):
+	lea	-3(%rcx,%r13), %r14	C un + fn - 3
+	test	%r14, %r14
+	js	L(end)
+
+	push	%r8
+	push	%r10
+	push	%r11
+IFSTD(`	mov	%r11, %rdi	')
+IFDOS(`	mov	%r11, %rcx	')
+IFDOS(`	sub	$32, %rsp	')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFDOS(`	add	$32, %rsp	')
+	pop	%r11
+	pop	%r10
+	pop	%r8
+
+	mov	%r11, %rdx
+	mov	%rax, %rdi
+	imul	%rax, %rdx
+	mov	%rdx, %r9
+	mul	%r8
+	xor	R32(%rcx), R32(%rcx)
+	add	%r8, %r9
+	adc	$-1, %rcx
+	add	%rdx, %r9
+	adc	$0, %rcx
+	js	2f
+1:	dec	%rdi
+	sub	%r11, %r9
+	sbb	$0, %rcx
+	jns	1b
+2:
+
+	lea	(%rbp,%r14,8), %rbp
+	mov	%r11, %rsi
+	neg	%rsi			C -d1
+
+C rax rbx rcx rdx rsi rdi  rbp r8 r9 r10 r11 r12 r13 r14 r15
+C     n2  un      -d1 dinv qp  d0 q0     d1  up  fn      msl
+
+	ALIGN(16)
+L(top):	mov	%rdi, %rax		C di		ncp
+	mul	%rbx			C		0, 17
+	mov	%r10, %rcx		C
+	add	%rax, %rcx		C		4
+	adc	%rbx, %rdx		C		5
+	mov	%rdx, %r9		C q		6
+	imul	%rsi, %rdx		C		6
+	mov	%r8, %rax		C		ncp
+	lea	(%rdx, %r10), %rbx	C n1 -= ...	10
+	xor	R32(%r10), R32(%r10)	C
+	mul	%r9			C		7
+	cmp	%r14, %r13		C
+	jg	L(19)			C
+	mov	(%r12), %r10		C
+	sub	$8, %r12		C
+L(19):	sub	%r8, %r10		C		ncp
+	sbb	%r11, %rbx		C		11
+	sub	%rax, %r10		C		11
+	sbb	%rdx, %rbx		C		12
+	xor	R32(%rax), R32(%rax)	C
+	xor	R32(%rdx), R32(%rdx)	C
+	cmp	%rcx, %rbx		C		13
+	cmovnc	%r8, %rax		C		14
+	cmovnc	%r11, %rdx		C		14
+	adc	$0, %r9			C adjust q	14
+	nop
+	add	%rax, %r10		C		15
+	adc	%rdx, %rbx		C		16
+	cmp	%r11, %rbx		C
+	jae	L(fix)			C
+L(bck):	mov	%r9, (%rbp)		C
+	sub	$8, %rbp		C
+	dec	%r14
+	jns	L(top)
+
+L(end):	mov	%r10, 8(%r12)
+	mov	%rbx, 16(%r12)
+	pop	%rbx
+	pop	%rbp
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	mov	%r15, %rax
+	pop	%r15
+	FUNC_EXIT()
+	ret
+
+L(fix):	seta	%dl
+	cmp	%r8, %r10
+	setae	%al
+	orb	%dl, %al		C "orb" form to placate Sun tools
+	je	L(bck)
+	inc	%r9
+	sub	%r8, %r10
+	sbb	%r11, %rbx
+	jmp	L(bck)
+EPILOGUE()

diff --git a/mpn/x86_64/dos64.m4 b/mpn/x86_64/dos64.m4
new file mode 100644
index 0000000..0da1b36
--- /dev/null
+++ b/mpn/x86_64/dos64.m4

@@ -0,0 +1,101 @@
+divert(-1)
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+define(`HOST_DOS64')
+
+
+dnl  On DOS64 we always generate position-independent-code
+dnl
+
+define(`PIC')
+
+
+define(`LEA',`
+	lea	$1(%rip), $2
+')
+
+
+dnl  Usage: CALL(funcname)
+dnl
+dnl  Simply override the definition in x86_64-defs.m4.
+
+define(`CALL',`call	GSYM_PREFIX`'$1')
+define(`TCALL',`jmp	GSYM_PREFIX`'$1')
+
+
+dnl  Usage: JUMPTABSECT
+
+define(`JUMPTABSECT', `RODATA')
+
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT', `.long	$1-$2')
+
+
+dnl  Usage: FUNC_ENTRY(nregparmas)
+dnl  Usage: FUNC_EXIT()
+
+dnl  FUNC_ENTRY and FUNC_EXIT provide an easy path for adoption of standard
+dnl  ABI assembly to the DOS64 ABI.
+
+define(`FUNC_ENTRY',
+	`push	%rdi
+	push	%rsi
+	mov	%rcx, %rdi
+ifelse(eval($1>=2),1,`dnl
+	mov	%rdx, %rsi
+ifelse(eval($1>=3),1,`dnl
+	mov	%r8, %rdx
+ifelse(eval($1>=4),1,`dnl
+	mov	%r9, %rcx
+')')')')
+
+define(`FUNC_EXIT',
+	`pop	%rsi
+	pop	%rdi')
+
+
+dnl  Target ABI macros.  For DOS64 we override the defaults.
+
+define(`IFDOS',   `$1')
+define(`IFSTD',   `')
+define(`IFELF',   `')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `')
+
+
+divert`'dnl

diff --git a/mpn/x86_64/gcd_11.asm b/mpn/x86_64/gcd_11.asm
new file mode 100644
index 0000000..e78b5dd
--- /dev/null
+++ b/mpn/x86_64/gcd_11.asm

@@ -0,0 +1,114 @@
+dnl  AMD64 mpn_gcd_11 -- 1 x 1 gcd.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/bit
+C AMD K8,K9	 5.5
+C AMD K10	 ?
+C AMD bd1	 ?
+C AMD bd2	 ?
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD bt1	 7.1
+C AMD bt2	 ?
+C AMD zn1	 ?
+C AMD zn2	 ?
+C Intel P4	 ?
+C Intel CNR	 ?
+C Intel PNR	 ?
+C Intel NHM	 ?
+C Intel WSM	 ?
+C Intel SBR	 ?
+C Intel IBR	 ?
+C Intel HWL	 ?
+C Intel BWL	 ?
+C Intel SKL	 ?
+C Intel atom	 9.1
+C Intel SLM	 6.9
+C Intel GLM	 6.0
+C Intel GLM+	 5.8
+C VIA nano	 ?
+
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+DEF_OBJECT(ctz_table,64)
+	.byte	MAXSHIFT
+forloop(i,1,MASK,
+`	.byte	m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+define(`u0',    `%rdi')
+define(`v0',    `%rsi')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_gcd_11)
+	FUNC_ENTRY(2)
+	LEA(	ctz_table, %r8)
+	jmp	L(ent)
+
+	ALIGN(16)
+L(top):	cmovc	%rdx, u0		C u = |u - v|
+	cmovc	%rax, v0		C v = min(u,v)
+L(mid):	and	$MASK, R32(%rdx)
+	movzbl	(%r8,%rdx), R32(%rcx)
+	jz	L(shift_alot)
+	shr	R8(%rcx), u0
+L(ent):	mov	u0, %rax
+	mov	v0, %rdx
+	sub	u0, %rdx
+	sub	v0, u0
+	jnz	L(top)
+
+L(end):	C rax = result
+	C rdx = 0 for the benefit of internal gcd_22 call
+	FUNC_EXIT()
+	ret
+
+L(shift_alot):
+	shr	$MAXSHIFT, u0
+	mov	u0, %rdx
+	jmp	L(mid)
+EPILOGUE()

diff --git a/mpn/x86_64/gcd_22.asm b/mpn/x86_64/gcd_22.asm
new file mode 100644
index 0000000..08909d6
--- /dev/null
+++ b/mpn/x86_64/gcd_22.asm

@@ -0,0 +1,163 @@
+dnl  AMD64 mpn_gcd_22.  Assumes useless bsf, useless shrd, no tzcnt, no shlx.
+dnl  We actually use tzcnt here, when table cannot count bits, as tzcnt always
+dnl  works for our use, and helps a lot for certain CPUs.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/bit
+C AMD K8,K9	 8.9
+C AMD K10	 8.8
+C AMD bd1	 9.7
+C AMD bd2	 7.8
+C AMD bd3	 ?
+C AMD bd4	 7.4
+C AMD bt1	 9.2
+C AMD bt2	 9.1
+C AMD zn1	 7.5
+C AMD zn2	 7.5
+C Intel P4	 ?
+C Intel CNR	10.5
+C Intel PNR	10.5
+C Intel NHM	 9.7
+C Intel WSM	 9.7
+C Intel SBR	10.7
+C Intel IBR	 ?
+C Intel HWL	 9.5
+C Intel BWL	 8.7
+C Intel SKL	 8.6
+C Intel atom	18.9
+C Intel SLM	14.0
+C Intel GLM	 9.8
+C Intel GLM+	 8.8
+C VIA nano	 ?
+
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 8)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+DEF_OBJECT(ctz_table,64)
+	.byte	MAXSHIFT
+forloop(i,1,MASK,
+`	.byte	m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+define(`u1',    `%rdi')
+define(`u0',    `%rsi')
+define(`v1',    `%rdx')
+define(`v0_param', `%rcx')
+
+define(`v0',    `%rax')
+define(`cnt',   `%rcx')
+
+define(`s0',    `%r8')
+define(`s1',    `%r9')
+define(`t0',    `%rcx')
+define(`t1',    `%r11')
+
+dnl ABI_SUPPORT(DOS64)	C returns mp_double_limb_t in memory
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(64)
+PROLOGUE(mpn_gcd_22)
+	FUNC_ENTRY(4)
+	mov	v0_param, v0
+
+	LEA(	ctz_table, %r10)
+
+	ALIGN(16)
+L(top):	mov	v0, t0
+	sub	u0, t0
+	jz	L(lowz)		C	jump when low limb result = 0
+	mov	v1, t1
+	sbb	u1, t1
+
+	mov	u0, s0
+	mov	u1, s1
+
+	sub	v0, u0
+	sbb	v1, u1
+
+L(bck):	cmovc	t0, u0		C u = |u - v|
+	cmovc	t1, u1		C u = |u - v|
+	cmovc	s0, v0		C v = min(u,v)
+	cmovc	s1, v1		C v = min(u,v)
+
+	and	$MASK, R32(t0)
+	movzbl	(%r10,t0), R32(cnt)
+	jz	L(count_better)
+C Rightshift (u1,,u0) into (u1,,u0)
+L(shr):	shr	R8(cnt), u0
+	mov	u1, t1
+	shr	R8(cnt), u1
+	neg	cnt
+	shl	R8(cnt), t1
+	or	t1, u0
+
+	test	v1, v1
+	jnz	L(top)
+	test	u1, u1
+	jnz	L(top)
+
+L(gcd_11):
+	mov	v0, %rdi
+C	mov	u0, %rsi
+	TCALL(	mpn_gcd_11)
+
+L(count_better):
+	rep;bsf	u0, cnt		C tzcnt!
+	jmp	L(shr)
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	mov	v1, t0
+	sub	u1, t0
+	je	L(end)
+
+	xor	t1, t1
+	mov	u0, s0
+	mov	u1, s1
+	mov	u1, u0
+	xor	u1, u1
+	sub	v1, u0
+	jmp	L(bck)
+
+L(end):	C mov	v0, %rax
+	C mov	v1, %rdx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/gmp-mparam.h b/mpn/x86_64/gmp-mparam.h
new file mode 100644
index 0000000..db94fb7
--- /dev/null
+++ b/mpn/x86_64/gmp-mparam.h

@@ -0,0 +1,217 @@
+/* AMD K8-K10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
+
+#define MUL_TOOM22_THRESHOLD                27
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               234
+#define MUL_TOOM6H_THRESHOLD               418
+#define MUL_TOOM8H_THRESHOLD               466
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     145
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     175
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 36
+#define SQR_TOOM3_THRESHOLD                117
+#define SQR_TOOM4_THRESHOLD                327
+#define SQR_TOOM6_THRESHOLD                446
+#define SQR_TOOM8_THRESHOLD                547
+
+#define MULMID_TOOM42_THRESHOLD             36
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define POWM_SEC_TABLE  2,67,322,991
+
+#define MUL_FFT_MODF_THRESHOLD             570  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     29, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55,10}, {     15, 9}, {     43,10}, {     23, 9}, \
+    {     55,10}, {     31, 9}, {     63, 5}, {   1023, 4}, \
+    {   2431, 5}, {   1279, 6}, {    671, 7}, {    367, 8}, \
+    {    189, 9}, {     95, 8}, {    195, 9}, {    111,11}, \
+    {     31, 9}, {    131,10}, {     71, 9}, {    155,10}, \
+    {     79, 9}, {    159,10}, {     87,11}, {     47,10}, \
+    {    111,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    143,10}, {    287,11}, {    159,10}, \
+    {    319,11}, {    175,12}, {     95,11}, {    207,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
+    {    223,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    799,11}, {   1599,12}, {    831,13}, \
+    {    447,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
+    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
+    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
+    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
+    {   3583,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             460  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    460, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     43,10}, {     23, 9}, \
+    {     55,11}, {     15,10}, {     31, 9}, {     71,10}, \
+    {     39, 9}, {     83,10}, {     47, 6}, {    767, 4}, \
+    {   3263, 5}, {   1727, 4}, {   3455, 5}, {   1791, 6}, \
+    {    927, 7}, {    479, 6}, {    959, 7}, {    511, 8}, \
+    {    271, 9}, {    147,10}, {     87,11}, {     47,10}, \
+    {     95,12}, {     31,11}, {     63,10}, {    135,11}, \
+    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
+    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,14}, \
+    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3071,14}, {   1791,16}, {    511,15}, \
+    {   1023,14}, {   2303,15}, {   1279,14}, {   2687,15}, \
+    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD                 5248
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  35
+#define MULLO_MUL_N_THRESHOLD            15604
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             220
+#define DC_BDIV_QR_THRESHOLD                52
+#define DC_BDIV_Q_THRESHOLD                152
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 214
+
+#define BINV_NEWTON_THRESHOLD              327
+#define REDC_1_TO_REDC_2_THRESHOLD           4
+#define REDC_2_TO_REDC_N_THRESHOLD          79
+
+#define MU_DIV_QR_THRESHOLD               1895
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD              106
+#define MU_BDIV_QR_THRESHOLD              1589
+#define MU_BDIV_Q_THRESHOLD               1718
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     125
+#define HGCD_APPR_THRESHOLD                173
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   555
+#define GCDEXT_DC_THRESHOLD                478
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               248
+#define SET_STR_PRECOMPUTE_THRESHOLD      1648
+
+#define FAC_DSC_THRESHOLD                 1075
+#define FAC_ODD_THRESHOLD                    0  /* always */

diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm
new file mode 100644
index 0000000..136e67b
--- /dev/null
+++ b/mpn/x86_64/invert_limb.asm

@@ -0,0 +1,112 @@
+dnl  AMD64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+
+dnl  Copyright 2004, 2007-2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb (approx)	div
+C AMD K8,K9	 48			 71
+C AMD K10	 48			 77
+C Intel P4	135			161
+C Intel core2	 69			116
+C Intel corei	 55			 89
+C Intel atom	129			191
+C VIA nano	 79			157
+
+C rax rcx rdx rdi rsi r8
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_invert_limb)		C			Kn	C2	Ci
+	FUNC_ENTRY(1)
+	mov	%rdi, %rax		C			 0	 0	 0
+	shr	$55, %rax		C			 1	 1	 1
+ifdef(`DARWIN',`
+	lea	mpn_invert_limb_table(%rip), %r8
+	add	$-512, %r8
+',`
+	lea	-512+mpn_invert_limb_table(%rip), %r8
+')
+	movzwl	(%r8,%rax,2), R32(%rcx)	C	%rcx = v0
+
+	C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
+	mov	%rdi, %rsi		C			 0	 0	 0
+	mov	R32(%rcx), R32(%rax)	C			 4	 5	 5
+	imul	R32(%rcx), R32(%rcx)	C			 4	 5	 5
+	shr	$24, %rsi		C			 1	 1	 1
+	inc	%rsi			C	%rsi = d40
+	imul	%rsi, %rcx		C			 8	10	 8
+	shr	$40, %rcx		C			12	15	11
+	sal	$11, R32(%rax)		C			 5	 6	 6
+	dec	R32(%rax)
+	sub	R32(%rcx), R32(%rax)	C	%rax = v1
+
+	C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
+	mov	$0x1000000000000000, %rcx
+	imul	%rax, %rsi		C			14	17	13
+	sub	%rsi, %rcx
+	imul	%rax, %rcx
+	sal	$13, %rax
+	shr	$47, %rcx
+	add	%rax, %rcx		C	%rcx = v2
+
+	C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + ((v2 >> 1) & mask)) >> 65)
+	mov	%rdi, %rsi		C			 0	 0	 0
+	shr	%rsi			C d/2
+	sbb	%rax, %rax		C -d0 = -(d mod 2)
+	sub	%rax, %rsi		C d63 = ceil(d/2)
+	imul	%rcx, %rsi		C v2 * d63
+	and	%rcx, %rax		C v2 * d0
+	shr	%rax			C (v2>>1) * d0
+	sub	%rsi, %rax		C (v2>>1) * d0 - v2 * d63
+	mul	%rcx
+	sal	$31, %rcx
+	shr	%rdx
+	add	%rdx, %rcx		C	%rcx = v3
+
+	mov	%rdi, %rax
+	mul	%rcx
+	add	%rdi, %rax
+	mov	%rcx, %rax
+	adc	%rdi, %rdx
+	sub	%rdx, %rax
+
+	FUNC_EXIT()
+	ret
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86_64/invert_limb_table.asm b/mpn/x86_64/invert_limb_table.asm
new file mode 100644
index 0000000..9f637ef
--- /dev/null
+++ b/mpn/x86_64/invert_limb_table.asm

@@ -0,0 +1,50 @@
+dnl  Table used for mpn_invert_limb
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+
+dnl  Copyright 2004, 2007-2009, 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
+
+ASM_START()
+C Table entry X contains floor (0x7fd00 / (0x100 + X))
+
+	RODATA
+	ALIGN(2)
+	GLOBL mpn_invert_limb_table
+mpn_invert_limb_table:
+forloop(i,256,512-1,dnl
+`	.value	eval(0x7fd00/i)
+')dnl
+ASM_END()

diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm
new file mode 100644
index 0000000..7b29a96
--- /dev/null
+++ b/mpn/x86_64/logops_n.asm

@@ -0,0 +1,260 @@
+dnl  AMD64 logops.
+
+dnl  Copyright 2004-2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C		c/l	c/l	c/l	good
+C	       var-1   var-2   var-3  for cpu?
+C AMD K8,K9	 1.5	 1.5	 1.5	 y
+C AMD K10	 1.5	 1.5	 1.5	 y
+C AMD bd1
+C AMD bd2
+C AMD bd3
+C AMD bd4
+C AMD bt1	 2.67	~2.79	~2.67
+C AMD bt2	 2.0	 2.28	 2.28	 y
+C AMD zen	 1.5	 1.5	 1.5	 =
+C Intel P4	 2.8	 3.35	 3.6
+C Intel PNR	 2.0	 2.0	 2.0	 =
+C Intel NHM	 2.0	 2.0	 2.0	 =
+C Intel SBR	 1.5	 1.75	 1.75	 n
+C Intel IBR	 1.48	 1.71	 1.72	 n
+C Intel HWL	 1.5	 1.5	 1.5	 n
+C Intel BWL	 1.5	 1.5	 1.5	 n
+C Intel SKL	 1.5	 1.5	 1.5	 n
+C Intel atom	 3.82	 3.82	 3.82	 n
+C Intel SLM	 3.0	 3.0	 3.0	 =
+C VIA nano	 3.25
+
+ifdef(`OPERATION_and_n',`
+  define(`func',`mpn_and_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`and')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',`mpn_andn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`and')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',`mpn_nand_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`and')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',`mpn_ior_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`or')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',`mpn_iorn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`or')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',`mpn_nior_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`or')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',`mpn_xor_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`xor')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',`mpn_xnor_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`xor')')
+
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+
+ifdef(`VARIANT_1',`
+	TEXT
+	ALIGN(32)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	mov	(vp), %r8
+	mov	R32(%rcx), R32(%rax)
+	lea	(vp,n,8), vp
+	lea	(up,n,8), up
+	lea	(rp,n,8), rp
+	neg	n
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	LOGOP	(up,n,8), %r8
+	mov	%r8, (rp,n,8)
+	dec	n
+	jmp	L(e11)
+L(b10):	add	$-2, n
+	jmp	L(e10)
+L(b01):	LOGOP	(up,n,8), %r8
+	mov	%r8, (rp,n,8)
+	inc	n
+	jz	L(ret)
+
+L(top):	mov	(vp,n,8), %r8
+L(b00):	mov	8(vp,n,8), %r9
+	LOGOP	(up,n,8), %r8
+	LOGOP	8(up,n,8), %r9
+	nop				C K8/K9/K10 concession
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+L(e11):	mov	16(vp,n,8), %r8
+L(e10):	mov	24(vp,n,8), %r9
+	LOGOP	16(up,n,8), %r8
+	LOGOP	24(up,n,8), %r9
+	mov	%r8, 16(rp,n,8)
+	mov	%r9, 24(rp,n,8)
+	add	$4, n
+	jnc	L(top)
+
+L(ret):	FUNC_EXIT()
+	ret
+EPILOGUE()
+')
+
+ifdef(`VARIANT_2',`
+	TEXT
+	ALIGN(32)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	mov	(vp), %r8
+	not	%r8
+	mov	R32(%rcx), R32(%rax)
+	lea	(vp,n,8), vp
+	lea	(up,n,8), up
+	lea	(rp,n,8), rp
+	neg	n
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	LOGOP	(up,n,8), %r8
+	mov	%r8, (rp,n,8)
+	dec	n
+	jmp	L(e11)
+L(b10):	add	$-2, n
+	jmp	L(e10)
+	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):	LOGOP	(up,n,8), %r8
+	mov	%r8, (rp,n,8)
+	inc	n
+	jz	L(ret)
+
+L(top):	mov	(vp,n,8), %r8
+	not	%r8
+L(b00):	mov	8(vp,n,8), %r9
+	not	%r9
+	LOGOP	(up,n,8), %r8
+	LOGOP	8(up,n,8), %r9
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+L(e11):	mov	16(vp,n,8), %r8
+	not	%r8
+L(e10):	mov	24(vp,n,8), %r9
+	not	%r9
+	LOGOP	16(up,n,8), %r8
+	LOGOP	24(up,n,8), %r9
+	mov	%r8, 16(rp,n,8)
+	mov	%r9, 24(rp,n,8)
+	add	$4, n
+	jnc	L(top)
+
+L(ret):	FUNC_EXIT()
+	ret
+EPILOGUE()
+')
+
+ifdef(`VARIANT_3',`
+	TEXT
+	ALIGN(32)
+PROLOGUE(func)
+	FUNC_ENTRY(4)
+	mov	(vp), %r8
+	mov	R32(%rcx), R32(%rax)
+	lea	(vp,n,8), vp
+	lea	(up,n,8), up
+	lea	(rp,n,8), rp
+	neg	n
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	LOGOP	(up,n,8), %r8
+	not	%r8
+	mov	%r8, (rp,n,8)
+	dec	n
+	jmp	L(e11)
+L(b10):	add	$-2, n
+	jmp	L(e10)
+	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):	LOGOP	(up,n,8), %r8
+	not	%r8
+	mov	%r8, (rp,n,8)
+	inc	n
+	jz	L(ret)
+
+L(top):	mov	(vp,n,8), %r8
+L(b00):	mov	8(vp,n,8), %r9
+	LOGOP	(up,n,8), %r8
+	not	%r8
+	LOGOP	8(up,n,8), %r9
+	not	%r9
+	mov	%r8, (rp,n,8)
+	mov	%r9, 8(rp,n,8)
+L(e11):	mov	16(vp,n,8), %r8
+L(e10):	mov	24(vp,n,8), %r9
+	LOGOP	16(up,n,8), %r8
+	not	%r8
+	LOGOP	24(up,n,8), %r9
+	not	%r9
+	mov	%r8, 16(rp,n,8)
+	mov	%r9, 24(rp,n,8)
+	add	$4, n
+	jnc	L(top)
+
+L(ret):	FUNC_EXIT()
+	ret
+EPILOGUE()
+')

diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm
new file mode 100644
index 0000000..aa9e196
--- /dev/null
+++ b/mpn/x86_64/lshift.asm

@@ -0,0 +1,172 @@
+dnl  AMD64 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb   cycles/limb cnt=1
+C AMD K8,K9	 2.375		 1.375
+C AMD K10	 2.375		 1.375
+C Intel P4	 8		10.5
+C Intel core2	 2.11		 4.28
+C Intel corei	 ?		 ?
+C Intel atom	 5.75		 3.5
+C VIA nano	 3.5		 2.25
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`n',	`%rdx')
+define(`cnt',	`%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_lshift)
+	FUNC_ENTRY(4)
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-8(up,n,8), %rax
+	shr	R8(%rcx), %rax		C function return value
+
+	neg	R32(%rcx)		C put lsh count in cl
+	lea	1(n), R32(%r8)
+	and	$3, R32(%r8)
+	je	L(rlx)			C jump for n = 3, 7, 11, ...
+
+	dec	R32(%r8)
+	jne	L(1)
+C	n = 4, 8, 12, ...
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	%r10, -8(rp,n,8)
+	dec	n
+	jmp	L(rll)
+
+L(1):	dec	R32(%r8)
+	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
+C	n = 2, 6, 10, 16, ...
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	%r10, -8(rp,n,8)
+	dec	n
+	neg	R32(%rcx)		C put lsh count in cl
+L(1x):
+	cmp	$1, n
+	je	L(ast)
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r11
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	mov	-24(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, -8(rp,n,8)
+	mov	%r11, -16(rp,n,8)
+	sub	$2, n
+
+L(rll):	neg	R32(%rcx)		C put lsh count in cl
+L(rlx):	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r11
+
+	sub	$4, n			C				      4
+	jb	L(end)			C				      2
+	ALIGN(16)
+L(top):
+	C finish stuff from lsh block
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	16(up,n,8), %r8
+	mov	8(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, 24(rp,n,8)
+	mov	%r11, 16(rp,n,8)
+	C start two new rsh
+	mov	0(up,n,8), %r8
+	mov	-8(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	shr	R8(%rcx), %r9
+
+	C finish stuff from rsh block
+	neg	R32(%rcx)		C put lsh count in cl
+	mov	8(up,n,8), %r10
+	mov	0(up,n,8), %r11
+	shl	R8(%rcx), %r10
+	or	%r10, %r8
+	shl	R8(%rcx), %r11
+	or	%r11, %r9
+	mov	%r8, 8(rp,n,8)
+	mov	%r9, 0(rp,n,8)
+	C start two new lsh
+	mov	-8(up,n,8), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r10
+	shl	R8(%rcx), %r11
+
+	sub	$4, n
+	jae	L(top)			C				      2
+L(end):
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	8(up), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	(up), %r9
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, 16(rp)
+	mov	%r11, 8(rp)
+
+	neg	R32(%rcx)		C put lsh count in cl
+L(ast):	mov	(up), %r10
+	shl	R8(%rcx), %r10
+	mov	%r10, (rp)
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm
new file mode 100644
index 0000000..4f3c7c3
--- /dev/null
+++ b/mpn/x86_64/lshiftc.asm

@@ -0,0 +1,182 @@
+dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
+
+dnl  Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 2.75
+C AMD K10	 2.75
+C Intel P4	 ?
+C Intel core2	 ?
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 3.75
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`n',	`%rdx')
+define(`cnt',	`%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_lshiftc)
+	FUNC_ENTRY(4)
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-8(up,n,8), %rax
+	shr	R8(%rcx), %rax		C function return value
+
+	neg	R32(%rcx)		C put lsh count in cl
+	lea	1(n), R32(%r8)
+	and	$3, R32(%r8)
+	je	L(rlx)			C jump for n = 3, 7, 11, ...
+
+	dec	R32(%r8)
+	jne	L(1)
+C	n = 4, 8, 12, ...
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	not	%r10
+	mov	%r10, -8(rp,n,8)
+	dec	n
+	jmp	L(rll)
+
+L(1):	dec	R32(%r8)
+	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
+C	n = 2, 6, 10, 16, ...
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	not	%r10
+	mov	%r10, -8(rp,n,8)
+	dec	n
+	neg	R32(%rcx)		C put lsh count in cl
+L(1x):
+	cmp	$1, n
+	je	L(ast)
+	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r11
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	mov	-24(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	not	%r10
+	not	%r11
+	mov	%r10, -8(rp,n,8)
+	mov	%r11, -16(rp,n,8)
+	sub	$2, n
+
+L(rll):	neg	R32(%rcx)		C put lsh count in cl
+L(rlx):	mov	-8(up,n,8), %r10
+	shl	R8(%rcx), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r11
+
+	sub	$4, n			C				      4
+	jb	L(end)			C				      2
+	ALIGN(16)
+L(top):
+	C finish stuff from lsh block
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	16(up,n,8), %r8
+	mov	8(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	not	%r10
+	not	%r11
+	mov	%r10, 24(rp,n,8)
+	mov	%r11, 16(rp,n,8)
+	C start two new rsh
+	mov	0(up,n,8), %r8
+	mov	-8(up,n,8), %r9
+	shr	R8(%rcx), %r8
+	shr	R8(%rcx), %r9
+
+	C finish stuff from rsh block
+	neg	R32(%rcx)		C put lsh count in cl
+	mov	8(up,n,8), %r10
+	mov	0(up,n,8), %r11
+	shl	R8(%rcx), %r10
+	or	%r10, %r8
+	shl	R8(%rcx), %r11
+	or	%r11, %r9
+	not	%r8
+	not	%r9
+	mov	%r8, 8(rp,n,8)
+	mov	%r9, 0(rp,n,8)
+	C start two new lsh
+	mov	-8(up,n,8), %r10
+	mov	-16(up,n,8), %r11
+	shl	R8(%rcx), %r10
+	shl	R8(%rcx), %r11
+
+	sub	$4, n
+	jae	L(top)			C				      2
+L(end):
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	8(up), %r8
+	shr	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	(up), %r9
+	shr	R8(%rcx), %r9
+	or	%r9, %r11
+	not	%r10
+	not	%r11
+	mov	%r10, 16(rp)
+	mov	%r11, 8(rp)
+
+	neg	R32(%rcx)		C put lsh count in cl
+L(ast):	mov	(up), %r10
+	shl	R8(%rcx), %r10
+	not	%r10
+	mov	%r10, (rp)
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm
new file mode 100644
index 0000000..50bbc66
--- /dev/null
+++ b/mpn/x86_64/lshsub_n.asm

@@ -0,0 +1,172 @@
+dnl  AMD64 mpn_lshsub_n.  R = 2^k(U - V).
+
+dnl  Copyright 2006, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
+C AMD K10	 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
+C Intel P4	16.5
+C Intel core2	 4.35
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 ?
+
+C This was written quickly and not optimized at all, but it runs very well on
+C K8.  But perhaps one could get under 3 c/l.  Ideas:
+C   1) Use indexing to save the 3 LEA
+C   2) Write reasonable feed-in code
+C   3) Be more clever about register usage
+C   4) Unroll more, handling CL negation, carry save/restore cost much now
+C   5) Reschedule
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+define(`cnt',	`%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_lshsub_n)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8d	')
+
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	push	%rbx
+
+	mov	n, %rax
+	xor	R32(%rbx), R32(%rbx)	C clear carry save register
+	mov	R32(%r8), R32(%rcx)	C shift count
+	xor	R32(%r15), R32(%r15)	C limb carry
+
+	mov	R32(%rax), R32(%r11)
+	and	$3, R32(%r11)
+	je	L(4)
+	sub	$1, R32(%r11)
+
+L(oopette):
+	add	R32(%rbx), R32(%rbx)	C restore carry flag
+	mov	0(up), %r8
+	lea	8(up), up
+	sbb	0(vp), %r8
+	mov	%r8, %r12
+	sbb	R32(%rbx), R32(%rbx)	C save carry flag
+	shl	R8(%rcx), %r8
+	or	%r15, %r8
+	mov	%r12, %r15
+	lea	8(vp), vp
+	neg	R8(%rcx)
+	shr	R8(%rcx), %r15
+	neg	R8(%rcx)
+	mov	%r8, 0(rp)
+	lea	8(rp), rp
+	sub	$1, R32(%r11)
+	jnc	L(oopette)
+
+L(4):
+	sub	$4, %rax
+	jc	L(end)
+
+	ALIGN(16)
+L(oop):
+	add	R32(%rbx), R32(%rbx)	C restore carry flag
+
+	mov	0(up), %r8
+	mov	8(up), %r9
+	mov	16(up), %r10
+	mov	24(up), %r11
+
+	lea	32(up), up
+
+	sbb	0(vp), %r8
+	mov	%r8, %r12
+	sbb	8(vp), %r9
+	mov	%r9, %r13
+	sbb	16(vp), %r10
+	mov	%r10, %r14
+	sbb	24(vp), %r11
+
+	sbb	R32(%rbx), R32(%rbx)	C save carry flag
+
+	shl	R8(%rcx), %r8
+	shl	R8(%rcx), %r9
+	shl	R8(%rcx), %r10
+	or	%r15, %r8
+	mov	%r11, %r15
+	shl	R8(%rcx), %r11
+
+	lea	32(vp), vp
+
+	neg	R8(%rcx)
+
+	shr	R8(%rcx), %r12
+	shr	R8(%rcx), %r13
+	shr	R8(%rcx), %r14
+	shr	R8(%rcx), %r15		C used next loop
+
+	or	%r12, %r9
+	or	%r13, %r10
+	or	%r14, %r11
+
+	neg	R8(%rcx)
+
+	mov	%r8, 0(rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	mov	%r11, 24(rp)
+
+	lea	32(rp), rp
+
+	sub	$4, %rax
+	jnc	L(oop)
+L(end):
+	neg	R32(%rbx)
+	shl	R8(%rcx), %rbx
+	adc	%r15, %rbx
+	mov	%rbx, %rax
+	pop	%rbx
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/missing-call.m4 b/mpn/x86_64/missing-call.m4
new file mode 100644
index 0000000..c024f0e
--- /dev/null
+++ b/mpn/x86_64/missing-call.m4

@@ -0,0 +1,53 @@
+dnl  AMD64 MULX/ADX simulation support, function call version.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+define(`adox',`
+	push	$1
+	push	$2
+	call	__gmp_adox
+	pop	$2
+')
+
+define(`adcx',`
+	push	$1
+	push	$2
+	call	__gmp_adcx
+	pop	$2
+')
+
+define(`mulx',`
+	push	$1
+	call	__gmp_mulx
+	pop	$2
+	pop	$3
+')

diff --git a/mpn/x86_64/missing-inline.m4 b/mpn/x86_64/missing-inline.m4
new file mode 100644
index 0000000..bd1df13
--- /dev/null
+++ b/mpn/x86_64/missing-inline.m4

@@ -0,0 +1,100 @@
+dnl  AMD64 MULX/ADX simulation support, inline version.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+define(`adox',`
+	push	$2
+	push	%rcx
+	push	%rbx
+	push	%rax
+	mov	$1, %rcx
+	pushfq
+	pushfq
+C copy 0(%rsp):11 to 0(%rsp):0
+	mov	(%rsp), %rbx
+	shr	%rbx
+	bt	$`'10, %rbx
+	adc	%rbx, %rbx
+	mov	%rbx, (%rsp)
+C put manipulated flags into eflags, execute a plain adc
+	popfq
+	adc	%rcx, 32(%rsp)
+C copy CF to 0(%rsp):11
+	mov	(%rsp), %rbx
+	sbb	R32(%rax), R32(%rax)
+	and	$`'0x800, R32(%rax)
+	and	$`'0xfffffffffffff7ff, %rbx
+	or	%rax, %rbx
+	mov	%rbx, (%rsp)
+C put manipulated flags into eflags
+	popfq
+	pop	%rax
+	pop	%rbx
+	pop	%rcx
+	pop	$2
+')
+
+define(`adcx',`
+	push	$2
+	push	%rcx
+	push	%rbx
+	push	%rax
+	mov	$1, %rcx
+	pushfq
+	adc	%rcx, 32(%rsp)
+	mov	(%rsp), %rbx
+	sbb	R32(%rax), R32(%rax)
+	and	$`'0xfffffffffffffffe, %rbx
+	sub	%rax, %rbx
+	mov	%rbx, (%rsp)
+	popfq
+	pop	%rax
+	pop	%rbx
+	pop	%rcx
+	pop	$2
+')
+
+define(`mulx',`
+	lea	-16(%rsp), %rsp
+	push	%rax
+	push	%rdx
+	pushfq			C preserve all flags
+	mov	$1, %rax
+	mul	%rdx
+	mov	%rax, 24(%rsp)
+	mov	%rdx, 32(%rsp)
+	popfq			C restore eflags
+	pop	%rdx
+	pop	%rax
+	pop	$2
+	pop	$3
+')

diff --git a/mpn/x86_64/missing.asm b/mpn/x86_64/missing.asm
new file mode 100644
index 0000000..48c2b8a
--- /dev/null
+++ b/mpn/x86_64/missing.asm

@@ -0,0 +1,130 @@
+
+	dnl  AMD64 MULX/ADX simulation support.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+include(`config.m4')
+
+ASM_START()
+
+C Fake the MULX instruction
+C
+C Accept the single explicit parameter on the stack, return the two result
+C words on the stack.  This calling convention means that we need to move the
+C return address up.
+C
+PROLOGUE(__gmp_mulx)
+	lea	-8(%rsp), %rsp
+	push	%rax
+	push	%rdx
+	pushfq				C preserve all flags
+	mov	32(%rsp), %rax		C move retaddr...
+	mov	%rax, 24(%rsp)		C ...up the stack
+	mov	40(%rsp), %rax		C input parameter
+	mul	%rdx
+	mov	%rax, 32(%rsp)
+	mov	%rdx, 40(%rsp)
+	popfq				C restore eflags
+	pop	%rdx
+	pop	%rax
+	ret
+EPILOGUE()
+PROTECT(__gmp_mulx)
+
+
+C Fake the ADOX instruction
+C
+C Accept the two parameters on the stack, return the result word on the stack.
+C This calling convention means that we need to move the return address down.
+C
+PROLOGUE(__gmp_adox)
+	push	%rcx
+	push	%rbx
+	push	%rax
+	mov	32(%rsp), %rcx		C src2
+	mov	24(%rsp), %rax		C move retaddr...
+	mov	%rax, 32(%rsp)		C ...down the stack
+	pushfq
+C copy 0(%rsp):11 to 0(%rsp):0
+	mov	(%rsp), %rbx
+	shr	%rbx
+	bt	$10, %rbx
+	adc	%rbx, %rbx
+	push	%rbx
+C put manipulated flags into eflags, execute a plain adc
+	popfq
+	adc	%rcx, 48(%rsp)
+C copy CF to 0(%rsp):11
+	pop	%rbx
+	sbb	R32(%rax), R32(%rax)
+	and	$0x800, R32(%rax)
+	and	$0xfffffffffffff7ff, %rbx
+	or	%rax, %rbx
+	push	%rbx
+C put manipulated flags into eflags
+	popfq
+	pop	%rax
+	pop	%rbx
+	pop	%rcx
+	lea	8(%rsp), %rsp
+	ret
+EPILOGUE()
+PROTECT(__gmp_adox)
+
+
+C Fake the ADCX instruction
+C
+C Accept the two parameters on the stack, return the result word on the stack.
+C This calling convention means that we need to move the return address down.
+C
+PROLOGUE(__gmp_adcx)
+	push	%rcx
+	push	%rbx
+	push	%rax
+	mov	32(%rsp), %rcx		C src2
+	mov	24(%rsp), %rax		C move retaddr...
+	mov	%rax, 32(%rsp)		C ...down the stack
+	pushfq
+	adc	%rcx, 48(%rsp)
+	pop	%rbx
+	sbb	R32(%rax), R32(%rax)
+	and	$`'0xfffffffffffffffe, %rbx
+	sub	%rax, %rbx
+	push	%rbx
+	popfq
+	pop	%rax
+	pop	%rbx
+	pop	%rcx
+	lea	8(%rsp), %rsp
+	ret
+EPILOGUE()
+PROTECT(__gmp_adcx)

diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm
new file mode 100644
index 0000000..50a6692
--- /dev/null
+++ b/mpn/x86_64/mod_1_1.asm

@@ -0,0 +1,238 @@
+dnl  AMD64 mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund and Niels Möller.
+
+dnl  Copyright 2009-2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 6
+C AMD K10	 6
+C Intel P4	26
+C Intel core2	12.5
+C Intel NHM	11.3
+C Intel SBR	 8.4	(slowdown, old code took 8.0)
+C Intel atom	26
+C VIA nano	13
+
+define(`B2mb',   `%r10')
+define(`B2modb', `%r11')
+define(`ap',     `%rdi')
+define(`n',      `%rsi')
+define(`pre',    `%r8')
+define(`b',      `%rbx')
+
+define(`r0',     `%rbp') C r1 kept in %rax
+define(`r2',	 `%rcx')  C kept negated. Also used as shift count
+define(`t0',     `%r9')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+C                       %rdi         %rsi         %rdx                %rcx
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
+C The iteration is almost as follows,
+C
+C   r_2 B^3 + r_1 B^2 + r_0 B + u = r_1 B2modb + (r_0 + r_2 B2mod) B + u
+C
+C where r2 is a single bit represented as a mask. But to make sure that the
+C result fits in two limbs and a bit, carry from the addition
+C
+C   r_0 + r_2 B2mod
+C
+C is handled specially. On carry, we subtract b to cancel the carry,
+C and we use instead the value
+C
+C   r_0 + B2mb (mod B)
+C
+C This addition can be issued early since it doesn't depend on r2, and it is
+C the source of the cmov in the loop.
+C
+C We have the invariant that r_2 B^2 + r_1 B + r_0 < B^2 + B b
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mod_1_1p)
+	FUNC_ENTRY(4)
+	push	%rbp
+	push	%rbx
+	mov	%rdx, b
+	mov	%rcx, pre
+
+	mov	-8(ap, n, 8), %rax
+	cmp	$3, n
+	jnc	L(first)
+	mov	-16(ap, n, 8), r0
+	jmp	L(reduce_two)
+
+L(first):
+	C First iteration, no r2
+	mov	24(pre), B2modb
+	mul	B2modb
+	mov	-24(ap, n, 8), r0
+	add	%rax, r0
+	mov	-16(ap, n, 8), %rax
+	adc	%rdx, %rax
+	sbb	r2, r2
+	sub	$4, n
+	jc	L(reduce_three)
+
+	mov	B2modb, B2mb
+	sub	b, B2mb
+
+	ALIGN(16)
+L(top):	and	B2modb, r2
+	lea	(B2mb, r0), t0
+	mul	B2modb
+	add	r0, r2
+	mov	(ap, n, 8), r0
+	cmovc	t0, r2
+	add	%rax, r0
+	mov	r2, %rax
+	adc	%rdx, %rax
+	sbb	r2, r2
+	sub	$1, n
+	jnc	L(top)
+
+L(reduce_three):
+	C Eliminate r2
+	and	b, r2
+	sub	r2, %rax
+
+L(reduce_two):
+	mov	8(pre), R32(%rcx)
+	test	R32(%rcx), R32(%rcx)
+	jz	L(normalized)
+
+	C Unnormalized, use B1modb to reduce to size < B (b+1)
+	mulq	16(pre)
+	xor	t0, t0
+	add	%rax, r0
+	adc	%rdx, t0
+	mov	t0, %rax
+
+	C Left-shift to normalize
+ifdef(`SHLD_SLOW',`
+	shl	R8(%rcx), %rax
+	mov	r0, t0
+	neg	R32(%rcx)
+	shr	R8(%rcx), t0
+	or	t0, %rax
+	neg	R32(%rcx)
+',`
+	shld	R8(%rcx), r0, %rax
+')
+	shl	R8(%rcx), r0
+	jmp	L(udiv)
+
+L(normalized):
+	mov	%rax, t0
+	sub	b, t0
+	cmovnc	t0, %rax
+
+L(udiv):
+	lea	1(%rax), t0
+	mulq	(pre)
+	add	r0, %rax
+	adc	t0, %rdx
+	imul	b, %rdx
+	sub	%rdx, r0
+	cmp	r0, %rax
+	lea	(b, r0), %rax
+	cmovnc	r0, %rax
+	cmp	b, %rax
+	jnc	L(fix)
+L(ok):	shr	R8(%rcx), %rax
+
+	pop	%rbx
+	pop	%rbp
+	FUNC_EXIT()
+	ret
+L(fix):	sub	b, %rax
+	jmp	L(ok)
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1_1p_cps)
+	FUNC_ENTRY(2)
+	push	%rbp
+	bsr	%rsi, %rcx
+	push	%rbx
+	mov	%rdi, %rbx
+	push	%r12
+	xor	$63, R32(%rcx)
+	mov	%rsi, %r12
+	mov	R32(%rcx), R32(%rbp)
+	sal	R8(%rcx), %r12
+IFSTD(`	mov	%r12, %rdi	')	C pass parameter
+IFDOS(`	mov	%r12, %rcx	')	C pass parameter
+IFDOS(`	sub	$32, %rsp	')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFDOS(`	add	$32, %rsp	')
+	neg	%r12
+	mov	%r12, %r8
+	mov	%rax, (%rbx)		C store bi
+	mov	%rbp, 8(%rbx)		C store cnt
+	imul	%rax, %r12
+	mov	%r12, 24(%rbx)		C store B2modb
+	mov	R32(%rbp), R32(%rcx)
+	test	R32(%rcx), R32(%rcx)
+	jz	L(z)
+
+	mov	$1, R32(%rdx)
+ifdef(`SHLD_SLOW',`
+	C Destroys %rax, unlike shld. Otherwise, we could do B1modb
+	C before B2modb, and get rid of the move %r12, %r8 above.
+
+	shl	R8(%rcx), %rdx
+	neg	R32(%rcx)
+	shr	R8(%rcx), %rax
+	or	%rax, %rdx
+	neg	R32(%rcx)
+',`
+	shld	R8(%rcx), %rax, %rdx
+')
+	imul	%rdx, %r8
+	shr	R8(%rcx), %r8
+	mov	%r8, 16(%rbx)		C store B1modb
+L(z):
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
+	FUNC_EXIT()
+	ret
+EPILOGUE()
+ASM_END()

diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm
new file mode 100644
index 0000000..1879d73
--- /dev/null
+++ b/mpn/x86_64/mod_1_2.asm

@@ -0,0 +1,241 @@
+dnl  AMD64 mpn_mod_1s_2p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009-2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 4
+C AMD K10	 4
+C Intel P4	19
+C Intel core2	 8
+C Intel NHM	 6.5
+C Intel SBR	 4.5
+C Intel atom	28
+C VIA nano	 8
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p)
+	FUNC_ENTRY(4)
+	push	%r14
+	test	$1, R8(%rsi)
+	mov	%rdx, %r14
+	push	%r13
+	mov	%rcx, %r13
+	push	%r12
+	push	%rbp
+	push	%rbx
+	mov	16(%rcx), %r10
+	mov	24(%rcx), %rbx
+	mov	32(%rcx), %rbp
+	je	L(b0)
+	dec	%rsi
+	je	L(one)
+	mov	-8(%rdi,%rsi,8), %rax
+	mul	%r10
+	mov	%rax, %r9
+	mov	%rdx, %r8
+	mov	(%rdi,%rsi,8), %rax
+	add	-16(%rdi,%rsi,8), %r9
+	adc	$0, %r8
+	mul	%rbx
+	add	%rax, %r9
+	adc	%rdx, %r8
+	jmp	L(11)
+
+L(b0):	mov	-8(%rdi,%rsi,8), %r8
+	mov	-16(%rdi,%rsi,8), %r9
+
+L(11):	sub	$4, %rsi
+	jb	L(ed2)
+	lea	40(%rdi,%rsi,8), %rdi
+	mov	-40(%rdi), %r11
+	mov	-32(%rdi), %rax
+	jmp	L(m0)
+
+	ALIGN(16)
+L(top):	mov	-24(%rdi), %r9
+	add	%rax, %r11
+	mov	-16(%rdi), %rax
+	adc	%rdx, %r12
+	mul	%r10
+	add	%rax, %r9
+	mov	%r11, %rax
+	mov	%rdx, %r8
+	adc	$0, %r8
+	mul	%rbx
+	add	%rax, %r9
+	mov	%r12, %rax
+	adc	%rdx, %r8
+	mul	%rbp
+	sub	$2, %rsi
+	jb	L(ed1)
+	mov	-40(%rdi), %r11
+	add	%rax, %r9
+	mov	-32(%rdi), %rax
+	adc	%rdx, %r8
+L(m0):	mul	%r10
+	add	%rax, %r11
+	mov	%r9, %rax
+	mov	%rdx, %r12
+	adc	$0, %r12
+	mul	%rbx
+	add	%rax, %r11
+	lea	-32(%rdi), %rdi		C ap -= 4
+	mov	%r8, %rax
+	adc	%rdx, %r12
+	mul	%rbp
+	sub	$2, %rsi
+	jae	L(top)
+
+L(ed0):	mov	%r11, %r9
+	mov	%r12, %r8
+L(ed1):	add	%rax, %r9
+	adc	%rdx, %r8
+L(ed2):	mov	8(%r13), R32(%rdi)		C cnt
+	mov	%r8, %rax
+	mov	%r9, %r8
+	mul	%r10
+	add	%rax, %r8
+	adc	$0, %rdx
+L(1):	xor	R32(%rcx), R32(%rcx)
+	mov	%r8, %r9
+	sub	R32(%rdi), R32(%rcx)
+	shr	R8(%rcx), %r9
+	mov	R32(%rdi), R32(%rcx)
+	sal	R8(%rcx), %rdx
+	or	%rdx, %r9
+	sal	R8(%rcx), %r8
+	mov	%r9, %rax
+	mulq	(%r13)
+	mov	%rax, %rsi
+	inc	%r9
+	add	%r8, %rsi
+	adc	%r9, %rdx
+	imul	%r14, %rdx
+	sub	%rdx, %r8
+	lea	(%r8,%r14), %rax
+	cmp	%r8, %rsi
+	cmovc	%rax, %r8
+	mov	%r8, %rax
+	sub	%r14, %rax
+	cmovc	%r8, %rax
+	mov	R32(%rdi), R32(%rcx)
+	shr	R8(%rcx), %rax
+	pop	%rbx
+	pop	%rbp
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	FUNC_EXIT()
+	ret
+L(one):
+	mov	(%rdi), %r8
+	mov	8(%rcx), R32(%rdi)
+	xor	%rdx, %rdx
+	jmp	L(1)
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p_cps)
+	FUNC_ENTRY(2)
+	push	%rbp
+	bsr	%rsi, %rcx
+	push	%rbx
+	mov	%rdi, %rbx
+	push	%r12
+	xor	$63, R32(%rcx)
+	mov	%rsi, %r12
+	mov	R32(%rcx), R32(%rbp)	C preserve cnt over call
+	sal	R8(%rcx), %r12		C b << cnt
+IFSTD(`	mov	%r12, %rdi	')	C pass parameter
+IFDOS(`	mov	%r12, %rcx	')	C pass parameter
+IFDOS(`	sub	$32, %rsp	')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFDOS(`	add	$32, %rsp	')
+	mov	%r12, %r8
+	mov	%rax, %r11
+	mov	%rax, (%rbx)		C store bi
+	mov	%rbp, 8(%rbx)		C store cnt
+	neg	%r8
+	mov	R32(%rbp), R32(%rcx)
+	mov	$1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+	shl	R8(%rcx), %rsi
+	neg	R32(%rcx)
+	mov	%rax, %rbp
+	shr	R8(%rcx), %rax
+	or	%rax, %rsi
+	mov	%rbp, %rax
+	neg	R32(%rcx)
+',`
+	shld	R8(%rcx), %rax, %rsi	C FIXME: Slow on Atom and Nano
+')
+	imul	%r8, %rsi
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 16(%rbx)		C store B1modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	lea	(%rdx,%r12), %rsi
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %rsi
+	mov	%r11, %rax
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 24(%rbx)		C store B2modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	add	%rdx, %r12
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %r12
+
+	shr	R8(%rcx), %r12
+	mov	%r12, 32(%rbx)		C store B3modb
+
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm
new file mode 100644
index 0000000..168d80a
--- /dev/null
+++ b/mpn/x86_64/mod_1_4.asm

@@ -0,0 +1,272 @@
+dnl  AMD64 mpn_mod_1s_4p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009-2012, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 3
+C AMD K10	 3
+C Intel P4	15.5
+C Intel core2	 5
+C Intel corei	 4
+C Intel atom	23
+C VIA nano	 4.75
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p)
+	FUNC_ENTRY(4)
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	push	%rbp
+	push	%rbx
+
+	mov	%rdx, %r15
+	mov	%rcx, %r14
+	mov	16(%rcx), %r11		C B1modb
+	mov	24(%rcx), %rbx		C B2modb
+	mov	32(%rcx), %rbp		C B3modb
+	mov	40(%rcx), %r13		C B4modb
+	mov	48(%rcx), %r12		C B5modb
+	xor	R32(%r8), R32(%r8)
+	mov	R32(%rsi), R32(%rdx)
+	and	$3, R32(%rdx)
+	je	L(b0)
+	cmp	$2, R32(%rdx)
+	jc	L(b1)
+	je	L(b2)
+
+L(b3):	lea	-24(%rdi,%rsi,8), %rdi
+	mov	8(%rdi), %rax
+	mul	%r11
+	mov	(%rdi), %r9
+	add	%rax, %r9
+	adc	%rdx, %r8
+	mov	16(%rdi), %rax
+	mul	%rbx
+	jmp	L(m0)
+
+	ALIGN(8)
+L(b0):	lea	-32(%rdi,%rsi,8), %rdi
+	mov	8(%rdi), %rax
+	mul	%r11
+	mov	(%rdi), %r9
+	add	%rax, %r9
+	adc	%rdx, %r8
+	mov	16(%rdi), %rax
+	mul	%rbx
+	add	%rax, %r9
+	adc	%rdx, %r8
+	mov	24(%rdi), %rax
+	mul	%rbp
+	jmp	L(m0)
+
+	ALIGN(8)
+L(b1):	lea	-8(%rdi,%rsi,8), %rdi
+	mov	(%rdi), %r9
+	jmp	L(m1)
+
+	ALIGN(8)
+L(b2):	lea	-16(%rdi,%rsi,8), %rdi
+	mov	8(%rdi), %r8
+	mov	(%rdi), %r9
+	jmp	L(m1)
+
+	ALIGN(16)
+L(top):	mov	-24(%rdi), %rax
+	mov	-32(%rdi), %r10
+	mul	%r11			C up[1] * B1modb
+	add	%rax, %r10
+	mov	-16(%rdi), %rax
+	mov	$0, R32(%rcx)
+	adc	%rdx, %rcx
+	mul	%rbx			C up[2] * B2modb
+	add	%rax, %r10
+	mov	-8(%rdi), %rax
+	adc	%rdx, %rcx
+	sub	$32, %rdi
+	mul	%rbp			C up[3] * B3modb
+	add	%rax, %r10
+	mov	%r13, %rax
+	adc	%rdx, %rcx
+	mul	%r9			C rl * B4modb
+	add	%rax, %r10
+	mov	%r12, %rax
+	adc	%rdx, %rcx
+	mul	%r8			C rh * B5modb
+	mov	%r10, %r9
+	mov	%rcx, %r8
+L(m0):	add	%rax, %r9
+	adc	%rdx, %r8
+L(m1):	sub	$4, %rsi
+	ja	L(top)
+
+L(end):	mov	8(%r14), R32(%rsi)
+	mov	%r8, %rax
+	mul	%r11
+	mov	%rax, %r8
+	add	%r9, %r8
+	adc	$0, %rdx
+	xor	R32(%rcx), R32(%rcx)
+	sub	R32(%rsi), R32(%rcx)
+	mov	%r8, %rdi
+	shr	R8(%rcx), %rdi
+	mov	R32(%rsi), R32(%rcx)
+	sal	R8(%rcx), %rdx
+	or	%rdx, %rdi
+	mov	%rdi, %rax
+	mulq	(%r14)
+	mov	%r15, %rbx
+	mov	%rax, %r9
+	sal	R8(%rcx), %r8
+	inc	%rdi
+	add	%r8, %r9
+	adc	%rdi, %rdx
+	imul	%rbx, %rdx
+	sub	%rdx, %r8
+	lea	(%r8,%rbx), %rax
+	cmp	%r8, %r9
+	cmovc	%rax, %r8
+	mov	%r8, %rax
+	sub	%rbx, %rax
+	cmovc	%r8, %rax
+	shr	R8(%rcx), %rax
+	pop	%rbx
+	pop	%rbp
+	pop	%r12
+	pop	%r13
+	pop	%r14
+	pop	%r15
+	FUNC_EXIT()
+	ret
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p_cps)
+	FUNC_ENTRY(2)
+	push	%rbp
+	bsr	%rsi, %rcx
+	push	%rbx
+	mov	%rdi, %rbx
+	push	%r12
+	xor	$63, R32(%rcx)
+	mov	%rsi, %r12
+	mov	R32(%rcx), R32(%rbp)	C preserve cnt over call
+	sal	R8(%rcx), %r12		C b << cnt
+IFSTD(`	mov	%r12, %rdi	')	C pass parameter
+IFDOS(`	mov	%r12, %rcx	')	C pass parameter
+IFDOS(`	sub	$32, %rsp	')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_invert_limb)
+IFDOS(`	add	$32, %rsp	')
+	mov	%r12, %r8
+	mov	%rax, %r11
+	mov	%rax, (%rbx)		C store bi
+	mov	%rbp, 8(%rbx)		C store cnt
+	neg	%r8
+	mov	R32(%rbp), R32(%rcx)
+	mov	$1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+	shl	R8(%rcx), %rsi
+	neg	R32(%rcx)
+	mov	%rax, %rbp
+	shr	R8(%rcx), %rax
+	or	%rax, %rsi
+	mov	%rbp, %rax
+	neg	R32(%rcx)
+',`
+	shld	R8(%rcx), %rax, %rsi	C FIXME: Slow on Atom and Nano
+')
+	imul	%r8, %rsi
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 16(%rbx)		C store B1modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	lea	(%rdx,%r12), %rsi
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %rsi
+	mov	%r11, %rax
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 24(%rbx)		C store B2modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	lea	(%rdx,%r12), %rsi
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %rsi
+	mov	%r11, %rax
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 32(%rbx)		C store B3modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	lea	(%rdx,%r12), %rsi
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %rsi
+	mov	%r11, %rax
+	mul	%rsi
+
+	add	%rsi, %rdx
+	shr	R8(%rcx), %rsi
+	mov	%rsi, 40(%rbx)		C store B4modb
+
+	not	%rdx
+	imul	%r12, %rdx
+	add	%rdx, %r12
+	cmp	%rdx, %rax
+	cmovnc	%rdx, %r12
+
+	shr	R8(%rcx), %r12
+	mov	%r12, 48(%rbx)		C store B5modb
+
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm
new file mode 100644
index 0000000..3dd7721
--- /dev/null
+++ b/mpn/x86_64/mod_34lsub1.asm

@@ -0,0 +1,215 @@
+dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl  Copyright 2000-2002, 2004, 2005, 2007, 2009-2012 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	    cycles/limb
+C AMD K8,K9	 0.67	   0.583 is possible with zero-reg instead of $0, 4-way
+C AMD K10	 0.67	   this seems hard to beat
+C AMD bd1	 1
+C AMD bd2	 1
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD zen	 0.62
+C AMD bobcat	 1.07
+C AMD jaguar	 1
+C Intel P4	 7.35	   terrible, use old code
+C Intel core2	 1.25	   1+epsilon with huge unrolling
+C Intel NHM	 1.15	   this seems hard to beat
+C Intel SBR	 0.93
+C Intel IBR	 0.93
+C Intel HWL	 0.82
+C Intel BWL	 0.64
+C Intel SKY	 0.60
+C Intel atom	 2.5
+C Intel SLM      1.59
+C VIA nano	 1.25	   this seems hard to beat
+
+C INPUT PARAMETERS
+define(`ap',	%rdi)
+define(`n',	%rsi)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Review feed-in and wind-down code.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	FUNC_ENTRY(2)
+
+	mov	$0x0000FFFFFFFFFFFF, %r11
+
+	mov	(ap), %rax
+
+	cmp	$2, %rsi
+	ja	L(gt2)
+
+	jb	L(one)
+
+	mov	8(ap), %rsi
+	mov	%rax, %rdx
+	shr	$48, %rax		C src[0] low
+
+	and	%r11, %rdx		C src[0] high
+	add	%rdx, %rax
+	mov	R32(%rsi), R32(%rdx)
+
+	shr	$32, %rsi		C src[1] high
+	add	%rsi, %rax
+
+	shl	$16, %rdx		C src[1] low
+	add	%rdx, %rax
+L(one):	FUNC_EXIT()
+	ret
+
+
+C Don't change this, the wind-down code is not able to handle greater values
+define(UNROLL,3)
+
+L(gt2):	mov	8(ap), %rcx
+	mov	16(ap), %rdx
+	xor	%r9, %r9
+	add	$24, ap
+	sub	$eval(UNROLL*3+3), %rsi
+	jc	L(end)
+	ALIGN(16)
+L(top):
+	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+forloop(i,1,UNROLL-1,`dnl
+	add	eval(i*24)(ap), %rax
+	adc	eval(i*24+8)(ap), %rcx
+	adc	eval(i*24+16)(ap), %rdx
+	adc	$0, %r9
+')dnl
+	add	$eval(UNROLL*24), ap
+	sub	$eval(UNROLL*3), %rsi
+	jnc	L(top)
+
+L(end):
+	lea	L(tab)(%rip), %r8
+ifdef(`PIC',
+`	movslq	36(%r8,%rsi,4), %r10
+	add	%r10, %r8
+	jmp	*%r8
+',`
+	jmp	*72(%r8,%rsi,8)
+')
+	JUMPTABSECT
+	ALIGN(8)
+L(tab):	JMPENT(	L(0), L(tab))
+	JMPENT(	L(1), L(tab))
+	JMPENT(	L(2), L(tab))
+	JMPENT(	L(3), L(tab))
+	JMPENT(	L(4), L(tab))
+	JMPENT(	L(5), L(tab))
+	JMPENT(	L(6), L(tab))
+	JMPENT(	L(7), L(tab))
+	JMPENT(	L(8), L(tab))
+	TEXT
+
+L(6):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+	add	$24, ap
+L(3):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	jmp	L(cj1)
+
+L(7):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+	add	$24, ap
+L(4):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+	add	$24, ap
+L(1):	add	(ap), %rax
+	adc	$0, %rcx
+	jmp	L(cj2)
+
+L(8):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+	add	$24, ap
+L(5):	add	(ap), %rax
+	adc	8(ap), %rcx
+	adc	16(ap), %rdx
+	adc	$0, %r9
+	add	$24, ap
+L(2):	add	(ap), %rax
+	adc	8(ap), %rcx
+
+L(cj2):	adc	$0, %rdx
+L(cj1):	adc	$0, %r9
+L(0):	add	%r9, %rax
+	adc	$0, %rcx
+	adc	$0, %rdx
+	adc	$0, %rax
+
+	mov	%rax, %rdi		C 0mod3
+	shr	$48, %rax		C 0mod3 high
+
+	and	%r11, %rdi		C 0mod3 low
+	mov	R32(%rcx), R32(%r10)	C 1mod3
+
+	shr	$32, %rcx		C 1mod3 high
+
+	add	%rdi, %rax		C apply 0mod3 low
+	movzwl	%dx, R32(%rdi)		C 2mod3
+	shl	$16, %r10		C 1mod3 low
+
+	add	%rcx, %rax		C apply 1mod3 high
+	shr	$16, %rdx		C 2mod3 high
+
+	add	%r10, %rax		C apply 1mod3 low
+	shl	$32, %rdi		C 2mod3 low
+
+	add	%rdx, %rax		C apply 2mod3 high
+	add	%rdi, %rax		C apply 2mod3 low
+
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/mode1o.asm b/mpn/x86_64/mode1o.asm
new file mode 100644
index 0000000..a1775e3
--- /dev/null
+++ b/mpn/x86_64/mode1o.asm

@@ -0,0 +1,171 @@
+dnl  AMD64 mpn_modexact_1_odd -- Hensel norm remainder.
+
+dnl  Copyright 2000-2006, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	10
+C AMD K10	10
+C Intel P4	33
+C Intel core2	13
+C Intel corei	14.5
+C Intel atom	35
+C VIA nano	 ?
+
+
+C The dependent chain in the main loop is
+C
+C                            cycles
+C	sub	%rdx, %rax	1
+C	imul	%r9, %rax	4
+C	mul	%r8		5
+C			      ----
+C       total		       10
+C
+C The mov load from src seems to need to be scheduled back before the jz to
+C achieve this speed, out-of-order execution apparently can't completely hide
+C the latency otherwise.
+C
+C The l=src[i]-cbit step is rotated back too, since that allows us to avoid it
+C for the first iteration (where there's no cbit).
+C
+C The code alignment used (32-byte) for the loop also seems necessary.  Without
+C that the non-PIC case has adc crossing the 0x60 offset, apparently making it
+C run at 11 cycles instead of 10.
+
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_modexact_1_odd)
+	FUNC_ENTRY(3)
+	mov	$0, R32(%rcx)
+IFDOS(`	jmp	L(ent)		')
+
+PROLOGUE(mpn_modexact_1c_odd)
+	FUNC_ENTRY(4)
+L(ent):
+	C rdi	src
+	C rsi	size
+	C rdx	divisor
+	C rcx	carry
+
+	mov	%rdx, %r8		C d
+	shr	R32(%rdx)		C d/2
+
+	LEA(	binvert_limb_table, %r9)
+
+	and	$127, R32(%rdx)
+	mov	%rcx, %r10		C initial carry
+
+	movzbl	(%r9,%rdx), R32(%rdx)	C inv 8 bits
+
+	mov	(%rdi), %rax		C src[0]
+	lea	(%rdi,%rsi,8), %r11	C src end
+	mov	%r8, %rdi		C d, made available to imull
+
+	lea	(%rdx,%rdx), R32(%rcx)	C 2*inv
+	imul	R32(%rdx), R32(%rdx)	C inv*inv
+
+	neg	%rsi			C -size
+
+	imul	R32(%rdi), R32(%rdx)	C inv*inv*d
+
+	sub	R32(%rdx), R32(%rcx)	C inv = 2*inv - inv*inv*d, 16 bits
+
+	lea	(%rcx,%rcx), R32(%rdx)	C 2*inv
+	imul	R32(%rcx), R32(%rcx)	C inv*inv
+
+	imul	R32(%rdi), R32(%rcx)	C inv*inv*d
+
+	sub	R32(%rcx), R32(%rdx)	C inv = 2*inv - inv*inv*d, 32 bits
+	xor	R32(%rcx), R32(%rcx)	C initial cbit
+
+	lea	(%rdx,%rdx), %r9	C 2*inv
+	imul	%rdx, %rdx		C inv*inv
+
+	imul	%r8, %rdx		C inv*inv*d
+
+	sub	%rdx, %r9		C inv = 2*inv - inv*inv*d, 64 bits
+	mov	%r10, %rdx		C initial climb
+
+	ASSERT(e,`	C d*inv == 1 mod 2^64
+	mov	%r8, %r10
+	imul	%r9, %r10
+	cmp	$1, %r10')
+
+	inc	%rsi
+	jz	L(one)
+
+
+	ALIGN(16)
+L(top):
+	C rax	l = src[i]-cbit
+	C rcx	new cbit, 0 or 1
+	C rdx	climb, high of last product
+	C rsi	counter, limbs, negative
+	C rdi
+	C r8	divisor
+	C r9	inverse
+	C r11	src end ptr
+
+	sub	%rdx, %rax		C l = src[i]-cbit - climb
+
+	adc	$0, %rcx		C more cbit
+	imul	%r9, %rax		C q = l * inverse
+
+	mul	%r8			C climb = high (q * d)
+
+	mov	(%r11,%rsi,8), %rax	C src[i+1]
+	sub	%rcx, %rax		C next l = src[i+1] - cbit
+	setc	R8(%rcx)		C new cbit
+
+	inc	%rsi
+	jnz	L(top)
+
+
+L(one):
+	sub	%rdx, %rax		C l = src[i]-cbit - climb
+
+	adc	$0, %rcx		C more cbit
+	imul	%r9, %rax		C q = l * inverse
+
+	mul	%r8			C climb = high (q * d)
+
+	lea	(%rcx,%rdx), %rax	C climb+cbit
+	FUNC_EXIT()
+	ret
+
+EPILOGUE(mpn_modexact_1c_odd)
+EPILOGUE(mpn_modexact_1_odd)

diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm
new file mode 100644
index 0000000..02f6531
--- /dev/null
+++ b/mpn/x86_64/mul_1.asm

@@ -0,0 +1,192 @@
+dnl  AMD64 mpn_mul_1.
+
+dnl  Copyright 2003-2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9      2.54
+C AMD K10        2.54
+C AMD bull       4.98
+C AMD pile       4.80
+C AMD steam
+C AMD excavator
+C AMD bobcat     5.37
+C AMD jaguar     6.16
+C Intel P4      12.6
+C Intel core2    4.05
+C Intel NHM      4.0
+C Intel SBR      2.91
+C Intel IBR      2.73
+C Intel HWL      2.44
+C Intel BWL      2.39
+C Intel SKL      2.44
+C Intel atom    19.8
+C Intel SLM      9.0
+C VIA nano       4.25
+
+C The loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * The loop is great, but the prologue and epilogue code was quickly written.
+C    Tune it!
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`vl',      `%rcx')   C r9
+
+define(`n',       `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`	define(`up', ``%rsi'')	') dnl
+IFDOS(`	define(`rp', ``%rcx'')	') dnl
+IFDOS(`	define(`vl', ``%r9'')	') dnl
+IFDOS(`	define(`r9', ``rdi'')	') dnl
+IFDOS(`	define(`n',  ``%r8'')	') dnl
+IFDOS(`	define(`r8', ``r11'')	') dnl
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+IFDOS(``push	%rsi		'')
+IFDOS(``push	%rdi		'')
+IFDOS(``mov	%rdx, %rsi	'')
+	push	%rbx
+IFSTD(`	mov	%r8, %r10')
+IFDOS(`	mov	64(%rsp), %r10')	C 40 + 3*8  (3 push insns)
+	jmp	L(common)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+IFDOS(``push	%rsi		'')
+IFDOS(``push	%rdi		'')
+IFDOS(``mov	%rdx, %rsi	'')
+
+	push	%rbx
+	xor	%r10, %r10
+L(common):
+	mov	(up), %rax		C read first u limb early
+IFSTD(`	mov	n_param, %rbx   ')	C move away n from rdx, mul uses it
+IFDOS(`	mov	n, %rbx         ')
+	mul	vl
+IFSTD(`	mov	%rbx, n         ')
+
+	add	%r10, %rax
+	adc	$0, %rdx
+
+	and	$3, R32(%rbx)
+	jz	L(b0)
+	cmp	$2, R32(%rbx)
+	jz	L(b2)
+	jg	L(b3)
+
+L(b1):	dec	n
+	jne	L(gt1)
+	mov	%rax, (rp)
+	jmp	L(ret)
+L(gt1):	lea	8(up,n,8), up
+	lea	-8(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	xor	R32(%rbx), R32(%rbx)
+	mov	%rax, %r9
+	mov	(up,n,8), %rax
+	mov	%rdx, %r8
+	jmp	L(L1)
+
+L(b0):	lea	(up,n,8), up
+	lea	-16(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	mov	%rax, %r8
+	mov	%rdx, %rbx
+	jmp	 L(L0)
+
+L(b3):	lea	-8(up,n,8), up
+	lea	-24(rp,n,8), rp
+	neg	n
+	mov	%rax, %rbx
+	mov	%rdx, %r10
+	jmp	L(L3)
+
+L(b2):	lea	-16(up,n,8), up
+	lea	-32(rp,n,8), rp
+	neg	n
+	xor	%r8, %r8
+	xor	R32(%rbx), R32(%rbx)
+	mov	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%rdx, %r9
+	jmp	L(L2)
+
+	ALIGN(16)
+L(top):	mov	%r10, (rp,n,8)
+	add	%rax, %r9
+	mov	(up,n,8), %rax
+	adc	%rdx, %r8
+	mov	$0, R32(%r10)
+L(L1):	mul	vl
+	mov	%r9, 8(rp,n,8)
+	add	%rax, %r8
+	adc	%rdx, %rbx
+L(L0):	mov	8(up,n,8), %rax
+	mul	vl
+	mov	%r8, 16(rp,n,8)
+	add	%rax, %rbx
+	adc	%rdx, %r10
+L(L3):	mov	16(up,n,8), %rax
+	mul	vl
+	mov	%rbx, 24(rp,n,8)
+	mov	$0, R32(%r8)		C zero
+	mov	%r8, %rbx		C zero
+	add	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%r8, %r9		C zero
+	adc	%rdx, %r9
+L(L2):	mul	vl
+	add	$4, n
+	js	 L(top)
+
+	mov	%r10, (rp,n,8)
+	add	%rax, %r9
+	adc	%r8, %rdx
+	mov	%r9, 8(rp,n,8)
+	add	%r8, %rdx
+L(ret):	mov	%rdx, %rax
+
+	pop	%rbx
+IFDOS(``pop	%rdi		'')
+IFDOS(``pop	%rsi		'')
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm
new file mode 100644
index 0000000..1a9bc2c
--- /dev/null
+++ b/mpn/x86_64/mul_2.asm

@@ -0,0 +1,204 @@
+dnl  AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
+dnl  store the result in a third limb vector.
+
+dnl  Copyright 2008, 2011, 2012, 2016 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9      4.53
+C AMD K10        4.53
+C AMD bull       9.76   10.37
+C AMD pile       9.22
+C AMD steam
+C AMD excavator
+C AMD bobcat    11.3
+C AMD jaguar    11.9
+C Intel P4      25.0
+C Intel core2    8.05
+C Intel NHM      7.72
+C Intel SBR      6.33
+C Intel IBR      6.15
+C Intel HWL      6.00
+C Intel BWL      4.44
+C Intel SKL      4.54
+C Intel atom    39.0
+C Intel SLM     24.0
+C VIA nano
+
+C This code is the result of running a code generation and optimization tool
+C suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Work on feed-in and wind-down code.
+C  * Convert "mov $0" to "xor".
+C  * Adjust initial lea to save some bytes.
+C  * Perhaps adjust n from n_param&3 value?
+C  * Replace with 2.25 c/l sequence.
+
+C INPUT PARAMETERS
+define(`rp',	 `%rdi')
+define(`up',	 `%rsi')
+define(`n_param',`%rdx')
+define(`vp',	 `%rcx')
+
+define(`v0', `%r8')
+define(`v1', `%r9')
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+define(`n',  `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mul_2)
+	FUNC_ENTRY(4)
+	push	%rbx
+	push	%rbp
+
+	mov	(vp), v0
+	mov	8(vp), v1
+
+	mov	(up), %rax
+
+	mov	n_param, n
+	neg	n
+	lea	-8(up,n_param,8), up
+	lea	-8(rp,n_param,8), rp
+
+	and	$3, R32(n_param)
+	jz	L(m2p0)
+	cmp	$2, R32(n_param)
+	jc	L(m2p1)
+	jz	L(m2p2)
+L(m2p3):
+	mul	v0
+	xor	R32(w3), R32(w3)
+	mov	%rax, w1
+	mov	%rdx, w2
+	mov	8(up,n,8), %rax
+	add	$-1, n
+	mul	v1
+	add	%rax, w2
+	jmp	L(m23)
+L(m2p0):
+	mul	v0
+	xor	R32(w2), R32(w2)
+	mov	%rax, w0
+	mov	%rdx, w1
+	jmp	L(m20)
+L(m2p1):
+	mul	v0
+	xor	R32(w3), R32(w3)
+	xor	R32(w0), R32(w0)
+	xor	R32(w1), R32(w1)
+	add	$1, n
+	jmp	L(m2top)
+L(m2p2):
+	mul	v0
+	xor	R32(w0), R32(w0)
+	xor	R32(w1), R32(w1)
+	mov	%rax, w2
+	mov	%rdx, w3
+	mov	8(up,n,8), %rax
+	add	$-2, n
+	jmp	L(m22)
+
+
+	ALIGN(32)
+L(m2top):
+	add	%rax, w3
+	adc	%rdx, w0
+	mov	0(up,n,8), %rax
+	adc	$0, R32(w1)
+	mov	$0, R32(w2)
+	mul	v1
+	add	%rax, w0
+	mov	w3, 0(rp,n,8)
+	adc	%rdx, w1
+	mov	8(up,n,8), %rax
+	mul	v0
+	add	%rax, w0
+	adc	%rdx, w1
+	adc	$0, R32(w2)
+L(m20):	mov	8(up,n,8), %rax
+	mul	v1
+	add	%rax, w1
+	adc	%rdx, w2
+	mov	16(up,n,8), %rax
+	mov	$0, R32(w3)
+	mul	v0
+	add	%rax, w1
+	mov	16(up,n,8), %rax
+	adc	%rdx, w2
+	adc	$0, R32(w3)
+	mul	v1
+	add	%rax, w2
+	mov	w0, 8(rp,n,8)
+L(m23):	adc	%rdx, w3
+	mov	24(up,n,8), %rax
+	mul	v0
+	mov	$0, R32(w0)
+	add	%rax, w2
+	adc	%rdx, w3
+	mov	w1, 16(rp,n,8)
+	mov	24(up,n,8), %rax
+	mov	$0, R32(w1)
+	adc	$0, R32(w0)
+L(m22):	mul	v1
+	add	%rax, w3
+	mov	w2, 24(rp,n,8)
+	adc	%rdx, w0
+	mov	32(up,n,8), %rax
+	mul	v0
+	add	$4, n
+	js	L(m2top)
+
+
+	add	%rax, w3
+	adc	%rdx, w0
+	adc	$0, R32(w1)
+	mov	(up), %rax
+	mul	v1
+	mov	w3, (rp)
+	add	%rax, w0
+	adc	%rdx, w1
+	mov	w0, 8(rp)
+	mov	w1, %rax
+
+	pop	%rbp
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm
new file mode 100644
index 0000000..57ead59
--- /dev/null
+++ b/mpn/x86_64/popham.asm

@@ -0,0 +1,163 @@
+dnl  AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance.
+
+dnl  Copyright 2004, 2005, 2007, 2010-2012, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+include(`config.m4')
+
+
+C		     popcount	      hamdist
+C		    cycles/limb	    cycles/limb
+C AMD K8,K9		 6		 7
+C AMD K10		 6		 7
+C Intel P4		12		14.3
+C Intel core2		 7		 8
+C Intel corei		 ?		 7.3
+C Intel atom		16.5		17.5
+C VIA nano		 8.75		10.4
+
+C TODO
+C  * Tune.  It should be possible to reach 5 c/l for popcount and 6 c/l for
+C    hamdist for K8/K9.
+
+
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`up',		`%rdi')
+  define(`n',		`%rsi')
+  define(`h55555555',	`%r10')
+  define(`h33333333',	`%r11')
+  define(`h0f0f0f0f',	`%rcx')
+  define(`h01010101',	`%rdx')
+  define(`POP',		`$1')
+  define(`HAM',		`dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`up',		`%rdi')
+  define(`vp',		`%rsi')
+  define(`n',		`%rdx')
+  define(`h55555555',	`%r10')
+  define(`h33333333',	`%r11')
+  define(`h0f0f0f0f',	`%rcx')
+  define(`h01010101',	`%r12')
+  define(`POP',		`dnl')
+  define(`HAM',		`$1')
+')
+
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(func)
+ POP(`	FUNC_ENTRY(2)		')
+ HAM(`	FUNC_ENTRY(3)		')
+	push	%rbx
+	mov	$0x5555555555555555, h55555555
+	push	%rbp
+	mov	$0x3333333333333333, h33333333
+ HAM(`	push	%r12		')
+	lea	(up,n,8), up
+	mov	$0x0f0f0f0f0f0f0f0f, h0f0f0f0f
+ HAM(`	lea	(vp,n,8), vp	')
+	neg	n
+	mov	$0x0101010101010101, h01010101
+	xor	R32(%rax), R32(%rax)
+	test	$1, R8(n)
+	jz	L(top)
+
+	mov	(up,n,8), %r8
+ HAM(`	xor	(vp,n,8), %r8	')
+
+	mov	%r8, %r9
+	shr	%r8
+	and	h55555555, %r8
+	sub	%r8, %r9
+
+	mov	%r9, %r8
+	shr	$2, %r9
+	and	h33333333, %r8
+	and	h33333333, %r9
+	add	%r8, %r9		C 16 4-bit fields (0..4)
+
+	dec	n
+	jmp	L(mid)
+
+	ALIGN(16)
+L(top):	mov	(up,n,8), %r8
+	mov	8(up,n,8), %rbx
+ HAM(`	xor	(vp,n,8), %r8	')
+ HAM(`	xor	8(vp,n,8), %rbx	')
+
+	mov	%r8, %r9
+	mov	%rbx, %rbp
+	shr	%r8
+	shr	%rbx
+	and	h55555555, %r8
+	and	h55555555, %rbx
+	sub	%r8, %r9
+	sub	%rbx, %rbp
+
+	mov	%r9, %r8
+	mov	%rbp, %rbx
+	shr	$2, %r9
+	shr	$2, %rbp
+	and	h33333333, %r8
+	and	h33333333, %r9
+	and	h33333333, %rbx
+	and	h33333333, %rbp
+	add	%r8, %r9		C 16 4-bit fields (0..4)
+	add	%rbx, %rbp		C 16 4-bit fields (0..4)
+
+	add	%rbp, %r9		C 16 4-bit fields (0..8)
+L(mid):	mov	%r9, %r8
+	shr	$4, %r9
+	and	h0f0f0f0f, %r8
+	and	h0f0f0f0f, %r9
+	add	%r8, %r9		C 8 8-bit fields (0..16)
+
+	imul	h01010101, %r9		C sum the 8 fields in high 8 bits
+	shr	$56, %r9
+
+	add	%r9, %rax		C add to total
+	add	$2, n
+	jnc	L(top)
+
+L(end):
+ HAM(`	pop	%r12		')
+	pop	%rbp
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm
new file mode 100644
index 0000000..6a7d04d
--- /dev/null
+++ b/mpn/x86_64/rsh1aors_n.asm

@@ -0,0 +1,189 @@
+dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+dnl  AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
+
+dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C AMD K10	 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C Intel P4	12.75
+C Intel core2	 3.75
+C Intel NMH	 4.4
+C Intel SBR	 ?
+C Intel atom	 ?
+C VIA nano	 3.25
+
+C TODO
+C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n',`  %rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+	define(ADDSUB,	      add)
+	define(ADCSBB,	      adc)
+	define(func_n,	      mpn_rsh1add_n)
+	define(func_nc,	      mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+	define(ADDSUB,	      sub)
+	define(ADCSBB,	      sbb)
+	define(func_n,	      mpn_rsh1sub_n)
+	define(func_nc,	      mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_nc)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
+	push	%rbx
+
+	xor	R32(%rax), R32(%rax)
+	neg	%r8			C set C flag from parameter
+	mov	(up), %rbx
+	ADCSBB	(vp), %rbx
+	jmp	L(ent)
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(func_n)
+	FUNC_ENTRY(4)
+	push	%rbx
+
+	xor	R32(%rax), R32(%rax)
+	mov	(up), %rbx
+	ADDSUB	(vp), %rbx
+L(ent):
+	rcr	%rbx			C rotate, save acy
+	adc	R32(%rax), R32(%rax)	C return value
+
+	mov	R32(n), R32(%r11)
+	and	$3, R32(%r11)
+
+	cmp	$1, R32(%r11)
+	je	L(do)			C jump if n = 1 5 9 ...
+
+L(n1):	cmp	$2, R32(%r11)
+	jne	L(n2)			C jump unless n = 2 6 10 ...
+	add	%rbx, %rbx		C rotate carry limb, restore acy
+	mov	8(up), %r10
+	ADCSBB	8(vp), %r10
+	lea	8(up), up
+	lea	8(vp), vp
+	lea	8(rp), rp
+	rcr	%r10
+	rcr	%rbx
+	mov	%rbx, -8(rp)
+	jmp	L(cj1)
+
+L(n2):	cmp	$3, R32(%r11)
+	jne	L(n3)			C jump unless n = 3 7 11 ...
+	add	%rbx, %rbx		C rotate carry limb, restore acy
+	mov	8(up), %r9
+	mov	16(up), %r10
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	lea	16(up), up
+	lea	16(vp), vp
+	lea	16(rp), rp
+	rcr	%r10
+	rcr	%r9
+	rcr	%rbx
+	mov	%rbx, -16(rp)
+	jmp	L(cj2)
+
+L(n3):	dec	n			C come here for n = 4 8 12 ...
+	add	%rbx, %rbx		C rotate carry limb, restore acy
+	mov	8(up), %r8
+	mov	16(up), %r9
+	ADCSBB	8(vp), %r8
+	ADCSBB	16(vp), %r9
+	mov	24(up), %r10
+	ADCSBB	24(vp), %r10
+	lea	24(up), up
+	lea	24(vp), vp
+	lea	24(rp), rp
+	rcr	%r10
+	rcr	%r9
+	rcr	%r8
+	rcr	%rbx
+	mov	%rbx, -24(rp)
+	mov	%r8, -16(rp)
+L(cj2):	mov	%r9, -8(rp)
+L(cj1):	mov	%r10, %rbx
+
+L(do):
+	shr	$2, n			C				4
+	je	L(end)			C				2
+	ALIGN(16)
+L(top):	add	%rbx, %rbx		C rotate carry limb, restore acy
+
+	mov	8(up), %r8
+	mov	16(up), %r9
+	ADCSBB	8(vp), %r8
+	ADCSBB	16(vp), %r9
+	mov	24(up), %r10
+	mov	32(up), %r11
+	ADCSBB	24(vp), %r10
+	ADCSBB	32(vp), %r11
+
+	lea	32(up), up
+	lea	32(vp), vp
+
+	rcr	%r11			C rotate, save acy
+	rcr	%r10
+	rcr	%r9
+	rcr	%r8
+
+	rcr	%rbx
+	mov	%rbx, (rp)
+	mov	%r8, 8(rp)
+	mov	%r9, 16(rp)
+	mov	%r10, 24(rp)
+	mov	%r11, %rbx
+
+	lea	32(rp), rp
+	dec	n
+	jne	L(top)
+
+L(end):	mov	%rbx, (rp)
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm
new file mode 100644
index 0000000..eb04e29
--- /dev/null
+++ b/mpn/x86_64/rshift.asm

@@ -0,0 +1,176 @@
+dnl  AMD64 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 2.375
+C AMD K10	 2.375
+C Intel P4	 8
+C Intel core2	 2.11
+C Intel corei	 ?
+C Intel atom	 5.75
+C VIA nano	 3.5
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`n',	`%rdx')
+define(`cnt',	`%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_rshift)
+	FUNC_ENTRY(4)
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	(up), %rax
+	shl	R8(%rcx), %rax		C function return value
+	neg	R32(%rcx)		C put lsh count in cl
+
+	lea	1(n), R32(%r8)
+
+	lea	-8(up,n,8), up
+	lea	-8(rp,n,8), rp
+	neg	n
+
+	and	$3, R32(%r8)
+	je	L(rlx)			C jump for n = 3, 7, 11, ...
+
+	dec	R32(%r8)
+	jne	L(1)
+C	n = 4, 8, 12, ...
+	mov	8(up,n,8), %r10
+	shr	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	16(up,n,8), %r8
+	shl	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	%r10, 8(rp,n,8)
+	inc	n
+	jmp	L(rll)
+
+L(1):	dec	R32(%r8)
+	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
+C	n = 2, 6, 10, 16, ...
+	mov	8(up,n,8), %r10
+	shr	R8(%rcx), %r10
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	16(up,n,8), %r8
+	shl	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	%r10, 8(rp,n,8)
+	inc	n
+	neg	R32(%rcx)		C put lsh count in cl
+L(1x):
+	cmp	$-1, n
+	je	L(ast)
+	mov	8(up,n,8), %r10
+	shr	R8(%rcx), %r10
+	mov	16(up,n,8), %r11
+	shr	R8(%rcx), %r11
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	16(up,n,8), %r8
+	mov	24(up,n,8), %r9
+	shl	R8(%rcx), %r8
+	or	%r8, %r10
+	shl	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, 8(rp,n,8)
+	mov	%r11, 16(rp,n,8)
+	add	$2, n
+
+L(rll):	neg	R32(%rcx)		C put lsh count in cl
+L(rlx):	mov	8(up,n,8), %r10
+	shr	R8(%rcx), %r10
+	mov	16(up,n,8), %r11
+	shr	R8(%rcx), %r11
+
+	add	$4, n			C				      4
+	jb	L(end)			C				      2
+	ALIGN(16)
+L(top):
+	C finish stuff from lsh block
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	mov	-8(up,n,8), %r9
+	shl	R8(%rcx), %r8
+	or	%r8, %r10
+	shl	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, -24(rp,n,8)
+	mov	%r11, -16(rp,n,8)
+	C start two new rsh
+	mov	(up,n,8), %r8
+	mov	8(up,n,8), %r9
+	shl	R8(%rcx), %r8
+	shl	R8(%rcx), %r9
+
+	C finish stuff from rsh block
+	neg	R32(%rcx)		C put lsh count in cl
+	mov	-8(up,n,8), %r10
+	mov	0(up,n,8), %r11
+	shr	R8(%rcx), %r10
+	or	%r10, %r8
+	shr	R8(%rcx), %r11
+	or	%r11, %r9
+	mov	%r8, -8(rp,n,8)
+	mov	%r9, 0(rp,n,8)
+	C start two new lsh
+	mov	8(up,n,8), %r10
+	mov	16(up,n,8), %r11
+	shr	R8(%rcx), %r10
+	shr	R8(%rcx), %r11
+
+	add	$4, n
+	jae	L(top)			C				      2
+L(end):
+	neg	R32(%rcx)		C put rsh count in cl
+	mov	-8(up), %r8
+	shl	R8(%rcx), %r8
+	or	%r8, %r10
+	mov	(up), %r9
+	shl	R8(%rcx), %r9
+	or	%r9, %r11
+	mov	%r10, -16(rp)
+	mov	%r11, -8(rp)
+
+	neg	R32(%rcx)		C put lsh count in cl
+L(ast):	mov	(up), %r10
+	shr	R8(%rcx), %r10
+	mov	%r10, (rp)
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/sec_tabselect.asm b/mpn/x86_64/sec_tabselect.asm
new file mode 100644
index 0000000..d145630
--- /dev/null
+++ b/mpn/x86_64/sec_tabselect.asm

@@ -0,0 +1,176 @@
+dnl  AMD64 mpn_sec_tabselect.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb          good for cpu
+C AMD K8,K9	 1.5			Y
+C AMD K10	 1.4
+C AMD bd1	 2.64
+C AMD bobcat	 2.15			Y
+C Intel P4	 4
+C Intel core2	 1.38
+C Intel NHM	 1.75
+C Intel SBR	 1.25
+C Intel atom	 2.5			Y
+C VIA nano	 1.75			Y
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using SSE2/AVX2 could result in many-fold speedup.
+C  * WORKS FOR n mod 4 = 0 ONLY!
+
+C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `%rdi')
+define(`tp',     `%rsi')
+define(`n',      `%rdx')
+define(`nents',  `%rcx')
+define(`which',  `%r8')
+
+define(`i',      `%rbp')
+define(`j',      `%r9')
+
+C rax  rbx  rcx  rdx  rdi  rsi  rbp   r8   r9  r10  r11  r12  r13  r14  r15
+C          nents  n   rp   tab   i   which j    *    *    *    *    *    *
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_sec_tabselect)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8d	')
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	mov	n, j
+	add	$-4, j
+	js	L(outer_end)
+
+L(outer_top):
+	mov	nents, i
+	push	tp
+	xor	R32(%r12), R32(%r12)
+	xor	R32(%r13), R32(%r13)
+	xor	R32(%r14), R32(%r14)
+	xor	R32(%r15), R32(%r15)
+	mov	which, %rbx
+
+	ALIGN(16)
+L(top):	sub	$1, %rbx
+	sbb	%rax, %rax
+	mov	0(tp), %r10
+	mov	8(tp), %r11
+	and	%rax, %r10
+	and	%rax, %r11
+	or	%r10, %r12
+	or	%r11, %r13
+	mov	16(tp), %r10
+	mov	24(tp), %r11
+	and	%rax, %r10
+	and	%rax, %r11
+	or	%r10, %r14
+	or	%r11, %r15
+	lea	(tp,n,8), tp
+	add	$-1, i
+	jne	L(top)
+
+	mov	%r12, 0(rp)
+	mov	%r13, 8(rp)
+	mov	%r14, 16(rp)
+	mov	%r15, 24(rp)
+	pop	tp
+	lea	32(tp), tp
+	lea	32(rp), rp
+	add	$-4, j
+	jns	L(outer_top)
+L(outer_end):
+
+	test	$2, R8(n)
+	jz	L(b0x)
+L(b1x):	mov	nents, i
+	push	tp
+	xor	R32(%r12), R32(%r12)
+	xor	R32(%r13), R32(%r13)
+	mov	which, %rbx
+	ALIGN(16)
+L(tp2):	sub	$1, %rbx
+	sbb	%rax, %rax
+	mov	0(tp), %r10
+	mov	8(tp), %r11
+	and	%rax, %r10
+	and	%rax, %r11
+	or	%r10, %r12
+	or	%r11, %r13
+	lea	(tp,n,8), tp
+	add	$-1, i
+	jne	L(tp2)
+	mov	%r12, 0(rp)
+	mov	%r13, 8(rp)
+	pop	tp
+	lea	16(tp), tp
+	lea	16(rp), rp
+
+L(b0x):	test	$1, R8(n)
+	jz	L(b00)
+L(b01):	mov	nents, i
+	xor	R32(%r12), R32(%r12)
+	mov	which, %rbx
+	ALIGN(16)
+L(tp1):	sub	$1, %rbx
+	sbb	%rax, %rax
+	mov	0(tp), %r10
+	and	%rax, %r10
+	or	%r10, %r12
+	lea	(tp,n,8), tp
+	add	$-1, i
+	jne	L(tp1)
+	mov	%r12, 0(rp)
+
+L(b00):	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/sqr_diag_addlsh1.asm b/mpn/x86_64/sqr_diag_addlsh1.asm
new file mode 100644
index 0000000..3eb32c6
--- /dev/null
+++ b/mpn/x86_64/sqr_diag_addlsh1.asm

@@ -0,0 +1,116 @@
+dnl  AMD64 mpn_sqr_diag_addlsh1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+C	     cycles/limb
+C AMD K8,K9	 2.5
+C AMD K10	 2.5
+C AMD bull	 3.6
+C AMD pile	 3.6
+C AMD steam	 ?
+C AMD bobcat	 4
+C AMD jaguar	 ?
+C Intel P4	 11.5
+C Intel core	 4
+C Intel NHM	 3.6
+C Intel SBR	 3.15
+C Intel IBR	 3.0
+C Intel HWL	 2.6
+C Intel BWL	 ?
+C Intel atom	14
+C VIA nano	 3.5
+
+C When playing with pointers, set this to $2 to fall back to conservative
+C indexing in wind-down code.
+define(`I',`$1')
+
+define(`rp',     `%rdi')
+define(`tp',     `%rsi')
+define(`up_arg', `%rdx')
+define(`n',      `%rcx')
+
+define(`up',     `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_sqr_diag_addlsh1)
+	FUNC_ENTRY(4)
+	push	%rbx
+
+	dec	n
+	shl	n
+
+	mov	(up_arg), %rax
+
+	lea	(rp,n,8), rp
+	lea	(tp,n,8), tp
+	lea	(up_arg,n,4), up
+	neg	n
+
+	mul	%rax
+	mov	%rax, (rp,n,8)
+
+	xor	R32(%rbx), R32(%rbx)
+	jmp	L(mid)
+
+	ALIGN(16)
+L(top):	add	%r10, %r8
+	adc	%rax, %r9
+	mov	%r8, -8(rp,n,8)
+	mov	%r9, (rp,n,8)
+L(mid):	mov	8(up,n,4), %rax
+	mov	(tp,n,8), %r8
+	mov	8(tp,n,8), %r9
+	adc	%r8, %r8
+	adc	%r9, %r9
+	lea	(%rdx,%rbx), %r10
+	setc	R8(%rbx)
+	mul	%rax
+	add	$2, n
+	js	L(top)
+
+L(end):	add	%r10, %r8
+	adc	%rax, %r9
+	mov	%r8, I(-8(rp),-8(rp,n,8))
+	mov	%r9, I((rp),(rp,n,8))
+	adc	%rbx, %rdx
+	mov	%rdx, I(8(rp),8(rp,n,8))
+
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm
new file mode 100644
index 0000000..86fd16d
--- /dev/null
+++ b/mpn/x86_64/sublsh1_n.asm

@@ -0,0 +1,160 @@
+dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2003, 2005-2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`config.m4')
+
+
+C	     cycles/limb
+C AMD K8,K9	 2.2
+C AMD K10	 2.2
+C Intel P4	12.75
+C Intel core2	 3.45
+C Intel corei	 ?
+C Intel atom	 ?
+C VIA nano	 3.25
+
+C Sometimes speed degenerates, supposedly related to that some operand
+C alignments cause cache conflicts.
+
+C The speed is limited by decoding/issue bandwidth.  There are 26 instructions
+C in the loop, which corresponds to 26/3/4 = 2.167 c/l.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_sublsh1_n)
+	FUNC_ENTRY(4)
+	push	%rbx
+	push	%rbp
+
+	mov	(vp), %r8
+	mov	R32(n), R32(%rax)
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	neg	n
+	xor	R32(%rbp), R32(%rbp)
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	add	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	mov	16(vp,n,8), %r10
+	adc	%r10, %r10
+	sbb	R32(%rax), R32(%rax)	C save scy
+	mov	(up,n,8), %rbp
+	mov	8(up,n,8), %rbx
+	sub	%r8, %rbp
+	sbb	%r9, %rbx
+	mov	%rbp, (rp,n,8)
+	mov	%rbx, 8(rp,n,8)
+	mov	16(up,n,8), %rbp
+	sbb	%r10, %rbp
+	mov	%rbp, 16(rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$3, n
+	jmp	L(ent)
+
+L(b10):	add	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	sbb	R32(%rax), R32(%rax)	C save scy
+	mov	(up,n,8), %rbp
+	mov	8(up,n,8), %rbx
+	sub	%r8, %rbp
+	sbb	%r9, %rbx
+	mov	%rbp, (rp,n,8)
+	mov	%rbx, 8(rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$2, n
+	jmp	L(ent)
+
+L(b01):	add	%r8, %r8
+	sbb	R32(%rax), R32(%rax)	C save scy
+	mov	(up,n,8), %rbp
+	sub	%r8, %rbp
+	mov	%rbp, (rp,n,8)
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	inc	n
+L(ent):	jns	L(end)
+
+	ALIGN(16)
+L(top):	add	R32(%rax), R32(%rax)	C restore scy
+
+	mov	(vp,n,8), %r8
+L(b00):	adc	%r8, %r8
+	mov	8(vp,n,8), %r9
+	adc	%r9, %r9
+	mov	16(vp,n,8), %r10
+	adc	%r10, %r10
+	mov	24(vp,n,8), %r11
+	adc	%r11, %r11
+
+	sbb	R32(%rax), R32(%rax)	C save scy
+	add	R32(%rbp), R32(%rbp)	C restore acy
+
+	mov	(up,n,8), %rbp
+	mov	8(up,n,8), %rbx
+	sbb	%r8, %rbp
+	sbb	%r9, %rbx
+	mov	%rbp, (rp,n,8)
+	mov	%rbx, 8(rp,n,8)
+	mov	16(up,n,8), %rbp
+	mov	24(up,n,8), %rbx
+	sbb	%r10, %rbp
+	sbb	%r11, %rbx
+	mov	%rbp, 16(rp,n,8)
+	mov	%rbx, 24(rp,n,8)
+
+	sbb	R32(%rbp), R32(%rbp)	C save acy
+	add	$4, n
+	js	L(top)
+
+L(end):	add	R32(%rbp), R32(%rax)
+	neg	R32(%rax)
+
+	pop	%rbp
+	pop	%rbx
+	FUNC_EXIT()
+	ret
+EPILOGUE()

diff --git a/mpn/x86_64/x86_64-defs.m4 b/mpn/x86_64/x86_64-defs.m4
new file mode 100644
index 0000000..4e08f2a
--- /dev/null
+++ b/mpn/x86_64/x86_64-defs.m4

@@ -0,0 +1,493 @@
+divert(-1)
+
+dnl  m4 macros for amd64 assembler.
+
+dnl  Copyright 1999-2005, 2008, 2009, 2011-2013, 2017 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+
+dnl  Usage: CPUVEC_FUNCS_LIST
+dnl
+dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
+dnl  order they appear in that structure.
+
+define(CPUVEC_FUNCS_LIST,
+``add_n',
+`addlsh1_n',
+`addlsh2_n',
+`addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`cnd_add_n',
+`cnd_sub_n',
+`com',
+`copyd',
+`copyi',
+`divexact_1',
+`divrem_1',
+`gcd_11',
+`lshift',
+`lshiftc',
+`mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
+`mod_34lsub1',
+`modexact_1c_odd',
+`mul_1',
+`mul_basecase',
+`mullo_basecase',
+`preinv_divrem_1',
+`preinv_mod_1',
+`redc_1',
+`redc_2',
+`rshift',
+`sqr_basecase',
+`sub_n',
+`sublsh1_n',
+`submul_1'')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
+dnl  since different alignments are wanted in various circumstances.  So for
+dnl  instance,
+dnl
+dnl                  TEXT
+dnl                  ALIGN(16)
+dnl          PROLOGUE(mpn_add_n)
+dnl                  ...
+dnl          EPILOGUE()
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`	GLOBL	$1
+	TYPE($1,`function')
+	COFF_TYPE($1)
+$1:
+')
+
+
+dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
+dnl
+dnl  Emit COFF style ".def ... .endef" type information for a function, when
+dnl  supported.  The argument should include any GSYM_PREFIX.
+dnl
+dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
+
+define(COFF_TYPE,
+m4_assert_numargs(1)
+m4_assert_defined(`HAVE_COFF_TYPE')
+`ifelse(HAVE_COFF_TYPE,yes,
+	`.def	$1
+	.scl	2
+	.type	32
+	.endef')')
+
+
+dnl  Usage: ASSERT([cond][,instructions])
+dnl
+dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl  flags condition to then be satisfied.  For example,
+dnl
+dnl         ASSERT(ne, `cmpq %rax, %rbx')
+dnl
+dnl  The instructions can be omitted to just assert a flags condition with
+dnl  no extra calculation.  For example,
+dnl
+dnl         ASSERT(nc)
+dnl
+dnl  When `instructions' is not empty, a pushfq/popfq is added for
+dnl  convenience to preserve the flags, but the instructions themselves must
+dnl  preserve any registers that matter.
+dnl
+dnl  The condition can be omitted to just output the given instructions when
+dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
+dnl  For example,
+dnl
+dnl         ASSERT(, `movq %rax, VAR_KEEPVAL')
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$1',,
+`	$2',
+`ifelse(`$2',,,
+`	pushfq')
+	$2
+	`j$1'	L(ASSERT_ok`'ASSERT_counter)
+	ud2	C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+ifelse(`$2',,,`	popfq')
+define(`ASSERT_counter',incr(ASSERT_counter))')')')
+
+define(ASSERT_counter,1)
+
+dnl LEA - load effective address
+dnl
+dnl FIXME: We should never create a GOT entry and therefore use the simpler 2nd
+dnl variant always. We need to understand what happens for not-yet-hidden
+dnl symbols first.
+dnl
+define(`LEA',`dnl
+ifdef(`PIC',
+	`mov	$1@GOTPCREL(%rip), $2'
+,
+	`lea	$1(%rip), $2')
+')
+
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(2,3)
+`	ifelse($#,3,`$3',`RODATA')
+	ALIGN($2)
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`	SIZE(`$1',.-`$1')')
+
+
+define(`R32',
+	`ifelse($1,`%rax',`%eax',
+		$1,`%rbx',`%ebx',
+		$1,`%rcx',`%ecx',
+		$1,`%rdx',`%edx',
+		$1,`%rsi',`%esi',
+		$1,`%rdi',`%edi',
+		$1,`%rbp',`%ebp',
+		$1,`%r8',`%r8d',
+		$1,`%r9',`%r9d',
+		$1,`%r10',`%r10d',
+		$1,`%r11',`%r11d',
+		$1,`%r12',`%r12d',
+		$1,`%r13',`%r13d',
+		$1,`%r14',`%r14d',
+		$1,`%r15',`%r15d')')
+define(`R8',
+	`ifelse($1,`%rax',`%al',
+		$1,`%rbx',`%bl',
+		$1,`%rcx',`%cl',
+		$1,`%rdx',`%dl',
+		$1,`%rsi',`%sil',
+		$1,`%rdi',`%dil',
+		$1,`%rbp',`%bpl',
+		$1,`%r8',`%r8b',
+		$1,`%r9',`%r9b',
+		$1,`%r10',`%r10b',
+		$1,`%r11',`%r11b',
+		$1,`%r12',`%r12b',
+		$1,`%r13',`%r13b',
+		$1,`%r14',`%r14b',
+		$1,`%r15',`%r15b')')
+
+
+dnl  Usage: CALL(funcname)
+dnl
+
+define(`CALL',`dnl
+ifdef(`PIC',
+	`call	GSYM_PREFIX`'$1@PLT'
+,
+	`call	GSYM_PREFIX`'$1'
+)')
+
+define(`TCALL',`dnl
+ifdef(`PIC',
+	`jmp	GSYM_PREFIX`'$1@PLT'
+,
+	`jmp	GSYM_PREFIX`'$1'
+)')
+
+
+define(`JUMPTABSECT', `.section	.data.rel.ro.local,"a",@progbits')
+
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+	`.long	$1-$2'dnl
+,
+	`.quad	$1'dnl
+)')
+
+
+dnl  These macros are defined just for DOS64, where they provide calling
+dnl  sequence glue code.
+
+define(`FUNC_ENTRY',`')
+define(`FUNC_EXIT',`')
+
+
+dnl  Target ABI macros.
+
+define(`IFDOS',   `')
+define(`IFSTD',   `$1')
+define(`IFELF',   `$1')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `.hidden $1')
+
+
+dnl  Usage: x86_lookup(target, key,value, key,value, ...)
+dnl
+dnl  Look for `target' among the `key' parameters.
+dnl
+dnl  x86_lookup expands to the corresponding `value', or generates an error
+dnl  if `target' isn't found.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+
+dnl  Usage: x86_opcode_regxmm(reg)
+dnl
+dnl  Validate the given xmm register, and return its number, 0 to 7.
+
+define(x86_opcode_regxmm,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regxmm_list)')
+
+define(x86_opcode_regxmm_list,
+``%xmm0',0,
+`%xmm1',1,
+`%xmm2',2,
+`%xmm3',3,
+`%xmm4',4,
+`%xmm5',5,
+`%xmm6',6,
+`%xmm7',7,
+`%xmm8',8,
+`%xmm9',9,
+`%xmm10',10,
+`%xmm11',11,
+`%xmm12',12,
+`%xmm13',13,
+`%xmm14',14,
+`%xmm15',15')
+
+dnl  Usage: palignr($imm,%srcreg,%dstreg)
+dnl
+dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
+dnl  still distributed versions of gas don't know SSSE3 instructions.
+
+define(`palignr',
+m4_assert_numargs(3)
+`.byte	0x66,dnl
+ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
+       `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
+0x0f,0x3a,0x0f,dnl
+eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
+substr($1,1)')
+
+
+dnl  Usage
+dnl
+dnl    regnum(op)   raw operand index (so slightly misnamed)
+dnl    regnumh(op)  high bit of register operand nimber
+dnl    ix(op)       0 for reg operand, 1 for plain pointer operand.
+dnl
+
+define(`regnum',`x86_lookup(`$1',oplist)')
+define(`regnumh',`eval(regnum($1)/8 & 1)')
+define(`ix',`eval(regnum($1)/16)')
+define(`oplist',
+``%rax',   0, `%rcx',   1, `%rdx',   2,  `%rbx',   3,
+ `%rsp',   4, `%rbp',   5, `%rsi',   6,  `%rdi',   7,
+ `%r8',    8, `%r9',    9, `%r10',  10,  `%r11',  11,
+ `%r12',  12, `%r13',  13, `%r14',  14,  `%r15',  15,
+ `(%rax)',16, `(%rcx)',17, `(%rdx)',18,  `(%rbx)',19,
+ `(%rsp)',20, `(%rbp)',21, `(%rsi)',22,  `(%rdi)',23,
+ `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
+ `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)',31')
+
+dnl  Usage (by mulx, shlx, shrx)
+dnl
+dnl     reg1,reg2,reg3,opc1,opc2
+dnl
+dnl  or
+dnl
+dnl     (reg1),reg2,reg3,opc1,opc2
+dnl
+dnl  where reg1 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl  or
+dnl
+dnl     off,(reg1),reg2,reg3,opc1,opc2
+dnl
+dnl  where reg1 is any register but rsp,r12.
+dnl
+dnl  The exceptions are due to special coding needed for some registers; rsp
+dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
+dnl  offset-less form.
+dnl
+dnl  Other addressing forms are not handled.  Invalid forms are not properly
+dnl  detected.  Offsets that don't fit one byte are not handled correctly.
+
+define(`c4_helper',`dnl
+.byte	0xc4`'dnl
+ifelse(`$#',5,`dnl
+,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
+,eval(0x$4-8*regnum($2))`'dnl
+,0x$5`'dnl
+,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
+',`$#',6,`dnl
+,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
+,eval(0x$5-8*regnum($3))`'dnl
+,0x$6`'dnl
+,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
+,eval(($1 + 256) % 256)`'dnl
+')')
+
+
+dnl  Usage
+dnl
+dnl     mulx(reg1,reg2,reg3)
+dnl
+dnl  or
+dnl
+dnl     mulx((reg1),reg2,reg3)
+dnl
+dnl  where reg1 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl     mulx(off,(reg1),reg2,reg3)
+dnl
+dnl  where reg1 is any register but rsp,r12.
+
+define(`mulx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($1,$2,$3,fb,f6)',`dnl         format 1,2
+c4_helper($1,$2,$3,$4,fb,f6)'dnl	format 3
+)')
+
+
+dnl  Usage
+dnl
+dnl     shlx(reg1,reg2,reg3)
+dnl     shrx(reg1,reg2,reg3)
+dnl
+dnl  or
+dnl
+dnl     shlx(reg1,(reg2),reg3)
+dnl     shrx(reg1,(reg2),reg3)
+dnl
+dnl  where reg2 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl     shlx(reg1,off,(reg2),reg3)
+dnl     shrx(reg1,off,(reg2),reg3)
+dnl
+dnl  where reg2 is any register but rsp,r12.
+
+define(`shlx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,f9,f7)',`dnl         format 1,2
+c4_helper($1,$3,$2,$4,f9,f7)'dnl        format 3
+)')
+
+define(`shrx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,fb,f7)',`dnl         format 1,2
+c4_helper($1,$3,$2,$4,fb,f7)'dnl        format 3
+)')
+
+define(`sarx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,fa,f7)',`dnl         format 1,2
+c4_helper($1,$3,$2,$4,fa,f7)'dnl        format 3
+)')
+
+
+dnl  Usage
+dnl
+dnl     adcx(reg1,reg2)
+dnl     adox(reg1,reg2)
+dnl
+dnl  or
+dnl
+dnl     adcx((reg1),reg2)
+dnl     adox((reg1),reg2)
+dnl
+dnl  where reg1 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl     adcx(off,(reg1),reg2)
+dnl     adox(off,(reg1),reg2)
+dnl
+dnl  where reg1 is any register but rsp,r12.
+dnl
+dnl  The exceptions are due to special coding needed for some registers; rsp
+dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
+dnl  offset-less form.
+dnl
+dnl  Other addressing forms are not handled.  Invalid forms are not properly
+dnl  detected.  Offsets that don't fit one byte are not handled correctly.
+
+define(`adx_helper',`dnl
+,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl
+,0x0f`'dnl
+,0x38`'dnl
+,0xf6`'dnl
+')
+
+define(`adx',`dnl
+ifelse(`$#',2,`dnl
+adx_helper($1,$2)dnl
+,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl
+',`$#',3,`dnl
+adx_helper($2,$3)dnl
+,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl
+,eval(($1 + 256) % 256)`'dnl
+')')
+
+define(`adcx',`dnl
+.byte	0x66`'dnl
+adx($@)')
+
+define(`adox',`dnl
+.byte	0xf3`'dnl
+adx($@)')
+
+divert`'dnl

diff --git a/mpq/abs.c b/mpq/abs.c
new file mode 100644
index 0000000..1f0bf13
--- /dev/null
+++ b/mpq/abs.c

@@ -0,0 +1,55 @@
+/* mpq_abs -- absolute value of a rational.
+
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpq_abs 1
+
+#include "gmp-impl.h"
+
+
+void
+mpq_abs (mpq_ptr dst, mpq_srcptr src)
+{
+  mp_size_t  num_abs_size = ABSIZ(NUM(src));
+
+  if (dst != src)
+    {
+      mp_size_t  den_size = SIZ(DEN(src));
+      mp_ptr dp;
+
+      dp = MPZ_NEWALLOC (NUM(dst), num_abs_size);
+      MPN_COPY (dp, PTR(NUM(src)), num_abs_size);
+
+      dp = MPZ_NEWALLOC (DEN(dst), den_size);
+      SIZ(DEN(dst)) = den_size;
+      MPN_COPY (dp, PTR(DEN(src)), den_size);
+    }
+
+  SIZ(NUM(dst)) = num_abs_size;
+}

diff --git a/mpq/aors.c b/mpq/aors.c
new file mode 100644
index 0000000..e86af94
--- /dev/null
+++ b/mpq/aors.c

@@ -0,0 +1,112 @@
+/* mpq_add, mpq_sub -- add or subtract rational numbers.
+
+Copyright 1991, 1994-1997, 2000, 2001, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+static void __gmpq_aors (REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) (mpz_ptr, mpz_srcptr, mpz_srcptr))) REGPARM_ATTR (1);
+#define mpq_aors(w,x,y,fun)  __gmpq_aors (REGPARM_3_1 (w, x, y, fun))
+
+REGPARM_ATTR (1) static void
+mpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,
+          void (*fun) (mpz_ptr, mpz_srcptr, mpz_srcptr))
+{
+  mpz_t gcd;
+  mpz_t tmp1, tmp2;
+  mp_size_t op1_num_size = ABSIZ(NUM(op1));
+  mp_size_t op1_den_size =   SIZ(DEN(op1));
+  mp_size_t op2_num_size = ABSIZ(NUM(op2));
+  mp_size_t op2_den_size =   SIZ(DEN(op2));
+  TMP_DECL;
+
+  TMP_MARK;
+  MPZ_TMP_INIT (gcd, MIN (op1_den_size, op2_den_size));
+  MPZ_TMP_INIT (tmp1, op1_num_size + op2_den_size);
+  MPZ_TMP_INIT (tmp2, op2_num_size + op1_den_size);
+
+  /* ROP might be identical to either operand, so don't store the
+     result there until we are finished with the input operands.  We
+     dare to overwrite the numerator of ROP when we are finished
+     with the numerators of OP1 and OP2.  */
+
+  mpz_gcd (gcd, DEN(op1), DEN(op2));
+  if (! MPZ_EQUAL_1_P (gcd))
+    {
+      mpz_t t;
+
+      MPZ_TMP_INIT (t, MAX (op1_num_size + op2_den_size,
+	     op2_num_size + op1_den_size) + 2 - SIZ(gcd));
+
+      mpz_divexact_gcd (t, DEN(op2), gcd);
+      mpz_divexact_gcd (tmp2, DEN(op1), gcd);
+
+      mpz_mul (tmp1, NUM(op1), t);
+      mpz_mul (t, NUM(op2), tmp2);
+
+      (*fun) (t, tmp1, t);
+
+      mpz_gcd (gcd, t, gcd);
+      if (MPZ_EQUAL_1_P (gcd))
+        {
+          mpz_set (NUM(rop), t);
+          mpz_mul (DEN(rop), DEN(op2), tmp2);
+        }
+      else
+        {
+          mpz_divexact_gcd (NUM(rop), t, gcd);
+          mpz_divexact_gcd (tmp1, DEN(op2), gcd);
+          mpz_mul (DEN(rop), tmp1, tmp2);
+        }
+    }
+  else
+    {
+      /* The common divisor is 1.  This is the case (for random input) with
+	 probability 6/(pi**2), which is about 60.8%.  */
+      mpz_mul (tmp1, NUM(op1), DEN(op2));
+      mpz_mul (tmp2, NUM(op2), DEN(op1));
+      (*fun) (NUM(rop), tmp1, tmp2);
+      mpz_mul (DEN(rop), DEN(op1), DEN(op2));
+    }
+  TMP_FREE;
+}
+
+
+void
+mpq_add (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpq_aors (rop, op1, op2, mpz_add);
+}
+
+void
+mpq_sub (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpq_aors (rop, op1, op2, mpz_sub);
+}

diff --git a/mpq/canonicalize.c b/mpq/canonicalize.c
new file mode 100644
index 0000000..7ac2331
--- /dev/null
+++ b/mpq/canonicalize.c

@@ -0,0 +1,61 @@
+/* mpq_canonicalize(op) -- Remove common factors of the denominator and
+   numerator in OP.
+
+Copyright 1991, 1994-1996, 2000, 2001, 2005, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_canonicalize (mpq_ptr op)
+{
+  mpz_t gcd;
+  TMP_DECL;
+
+  if (SIZ(DEN(op)) < 0)
+    {
+      SIZ(NUM(op)) = -SIZ(NUM(op));
+      SIZ(DEN(op)) = -SIZ(DEN(op));
+    }
+  else if (UNLIKELY (SIZ(DEN(op)) == 0))
+    DIVIDE_BY_ZERO;
+
+  TMP_MARK;
+
+  /* ??? Dunno if the 1+ is needed.  */
+  MPZ_TMP_INIT (gcd, 1 + MAX (ABSIZ(NUM(op)),
+			      SIZ(DEN(op))));
+
+  mpz_gcd (gcd, NUM(op), DEN(op));
+  if (! MPZ_EQUAL_1_P (gcd))
+    {
+      mpz_divexact_gcd (NUM(op), NUM(op), gcd);
+      mpz_divexact_gcd (DEN(op), DEN(op), gcd);
+    }
+  TMP_FREE;
+}

diff --git a/mpq/clear.c b/mpq/clear.c
new file mode 100644
index 0000000..60beb51
--- /dev/null
+++ b/mpq/clear.c

@@ -0,0 +1,41 @@
+/* mpq_clear -- free the space occupied by an mpq_t.
+
+Copyright 1991, 1994, 1995, 2000, 2001, 2015, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_clear (mpq_ptr x)
+{
+  if (ALLOC (NUM(x)))
+    __GMP_FREE_FUNC_LIMBS (PTR(NUM(x)), ALLOC(NUM(x)));
+  if (ALLOC (DEN(x)))
+    __GMP_FREE_FUNC_LIMBS (PTR(DEN(x)), ALLOC(DEN(x)));
+}

diff --git a/mpq/clears.c b/mpq/clears.c
new file mode 100644
index 0000000..68c6ad3
--- /dev/null
+++ b/mpq/clears.c

@@ -0,0 +1,52 @@
+/* mpq_clears() -- Clear multiple mpq_t variables.
+
+Copyright 2009, 2014, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpq_clears (mpq_ptr x, ...)
+{
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      if (ALLOC (NUM(x)))
+	__GMP_FREE_FUNC_LIMBS (PTR(NUM(x)), ALLOC(NUM(x)));
+      if (ALLOC (DEN(x)))
+	__GMP_FREE_FUNC_LIMBS (PTR(DEN(x)), ALLOC(DEN(x)));
+      x = va_arg (ap, mpq_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpq/cmp.c b/mpq/cmp.c
new file mode 100644
index 0000000..495e77e
--- /dev/null
+++ b/mpq/cmp.c

@@ -0,0 +1,169 @@
+/* mpq_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1994, 1996, 2001, 2002, 2005, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int
+mpq_cmp_numden (mpq_srcptr op1, mpz_srcptr num_op2, mpz_srcptr den_op2)
+{
+  mp_size_t num1_size = SIZ(NUM(op1));
+  mp_size_t den1_size = SIZ(DEN(op1));
+  mp_size_t num2_size = SIZ(num_op2);
+  mp_size_t den2_size = SIZ(den_op2);
+  int op2_is_int;
+  mp_limb_t d1h, d2h;
+  mp_size_t tmp1_size, tmp2_size;
+  mp_ptr tmp1_ptr, tmp2_ptr;
+  mp_size_t num1_sign;
+  int cc;
+  TMP_DECL;
+
+  /* need canonical signs to get right result */
+  ASSERT (den1_size > 0);
+  ASSERT (den2_size > 0);
+
+  if (num1_size == 0)
+    return -num2_size;
+  if (num2_size == 0)
+    return num1_size;
+  if ((num1_size ^ num2_size) < 0) /* I.e. are the signs different? */
+    return num1_size;
+
+  num1_sign = num1_size;
+  num1_size = ABS (num1_size);
+
+  /* THINK: Does storing d1h and d2h make sense? */
+  d1h = PTR(DEN(op1))[den1_size - 1];
+  d2h = PTR(den_op2)[den2_size - 1];
+  op2_is_int = (den2_size | d2h) == 1;
+  if ((unsigned) op2_is_int == (den1_size | d1h)) /* Both ops are integers */
+    /* return mpz_cmp (NUM (op1), num_op2); */
+    {
+      int cmp;
+
+      /* Cannot use num1_sign - num2_size, may overflow an "int" */
+      if (num1_sign != num2_size)
+	return (num1_sign > num2_size) ? 1 : -1;
+
+      cmp = mpn_cmp (PTR(NUM(op1)), PTR(num_op2), num1_size);
+      return (num1_sign > 0 ? cmp : -cmp);
+    }
+
+  num2_size = ABS (num2_size);
+
+  tmp1_size = num1_size + den2_size;
+  tmp2_size = num2_size + den1_size;
+
+  /* 1. Check to see if we can tell which operand is larger by just looking at
+     the number of limbs.  */
+
+  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.
+     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */
+  if (tmp1_size > tmp2_size + 1)
+    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */
+    return num1_sign;
+  if (tmp2_size + op2_is_int > tmp1_size + 1)
+    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */
+    return -num1_sign;
+
+  /* 2. Same, but compare the number of significant bits.  */
+  {
+    int cnt1, cnt2;
+    mp_bitcnt_t bits1, bits2;
+
+    count_leading_zeros (cnt1, PTR(NUM(op1))[num1_size - 1]);
+    count_leading_zeros (cnt2, d2h);
+    bits1 = (mp_bitcnt_t) tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
+
+    count_leading_zeros (cnt1, PTR(num_op2)[num2_size - 1]);
+    count_leading_zeros (cnt2, d1h);
+    bits2 = (mp_bitcnt_t) tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
+
+    if (bits1 > bits2 + 1)
+      return num1_sign;
+    if (bits2 + op2_is_int > bits1 + 1)
+      return -num1_sign;
+  }
+
+  /* 3. Finally, cross multiply and compare.  */
+
+  TMP_MARK;
+  if (op2_is_int)
+    {
+      tmp2_ptr = TMP_ALLOC_LIMBS (tmp2_size);
+      tmp1_ptr = PTR(NUM(op1));
+      --tmp1_size;
+    }
+  else
+    {
+  TMP_ALLOC_LIMBS_2 (tmp1_ptr,tmp1_size, tmp2_ptr,tmp2_size);
+
+  if (num1_size >= den2_size)
+    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
+			       PTR(NUM(op1)), num1_size,
+			       PTR(den_op2), den2_size);
+  else
+    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
+			       PTR(den_op2), den2_size,
+			       PTR(NUM(op1)), num1_size);
+    }
+
+   if (num2_size >= den1_size)
+     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
+				PTR(num_op2), num2_size,
+				PTR(DEN(op1)), den1_size);
+   else
+     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
+				PTR(DEN(op1)), den1_size,
+				PTR(num_op2), num2_size);
+
+
+  cc = tmp1_size - tmp2_size != 0
+    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);
+  TMP_FREE;
+  return num1_sign < 0 ? -cc : cc;
+}
+
+int
+mpq_cmp (mpq_srcptr op1, mpq_srcptr op2)
+{
+  return mpq_cmp_numden (op1, NUM(op2), DEN(op2));
+}
+
+int
+mpq_cmp_z (mpq_srcptr op1, mpz_srcptr op2)
+{
+  const static mp_limb_t one = 1;
+  const static mpz_t den = MPZ_ROINIT_N ((mp_limb_t *) &one, 1);
+
+  return mpq_cmp_numden (op1, op2, den);
+}

diff --git a/mpq/cmp_si.c b/mpq/cmp_si.c
new file mode 100644
index 0000000..faf472c
--- /dev/null
+++ b/mpq/cmp_si.c

@@ -0,0 +1,60 @@
+/* _mpq_cmp_si -- compare mpq and long/ulong fraction.
+
+Copyright 2001, 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Something like mpq_cmpabs_ui would be more useful for the neg/neg case,
+   and perhaps a version accepting a parameter to reverse the test, to make
+   it a tail call here.  */
+
+int
+_mpq_cmp_si (mpq_srcptr q, long n, unsigned long d)
+{
+  /* need canonical sign to get right result */
+  ASSERT (SIZ(DEN(q)) > 0);
+
+  if (n >= 0)
+    return _mpq_cmp_ui (q, n, d);
+  if (SIZ(NUM(q)) >= 0)
+    {
+      return 1;                                /* >=0 cmp <0 */
+    }
+  else
+    {
+      mpq_t  qabs;
+      SIZ(NUM(qabs)) = -SIZ(NUM(q));
+      PTR(NUM(qabs)) = PTR(NUM(q));
+      SIZ(DEN(qabs)) = SIZ(DEN(q));
+      PTR(DEN(qabs)) = PTR(DEN(q));
+
+      return - _mpq_cmp_ui (qabs, NEG_CAST (unsigned long, n), d);    /* <0 cmp <0 */
+    }
+}

diff --git a/mpq/cmp_ui.c b/mpq/cmp_ui.c
new file mode 100644
index 0000000..9d99a9a
--- /dev/null
+++ b/mpq/cmp_ui.c

@@ -0,0 +1,99 @@
+/* mpq_cmp_ui(u,vn,vd) -- Compare U with Vn/Vd.  Return positive, zero, or
+   negative based on if U > V, U == V, or U < V.  Vn and Vd may have
+   common factors.
+
+Copyright 1993, 1994, 1996, 2000-2003, 2005, 2014, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+_mpq_cmp_ui (mpq_srcptr op1, unsigned long int num2, unsigned long int den2)
+{
+  mp_size_t num1_size = SIZ(NUM(op1));
+  mp_size_t den1_size = SIZ(DEN(op1));
+  mp_size_t tmp1_size, tmp2_size;
+  mp_ptr tmp1_ptr, tmp2_ptr;
+  mp_limb_t cy_limb;
+  int cc;
+  TMP_DECL;
+
+#if GMP_NAIL_BITS != 0
+  if ((num2 | den2) > GMP_NUMB_MAX)
+    {
+      mpq_t op2;
+      mpq_init (op2);
+      mpz_set_ui (mpq_numref (op2), num2);
+      mpz_set_ui (mpq_denref (op2), den2);
+      cc = mpq_cmp (op1, op2);
+      mpq_clear (op2);
+      return cc;
+    }
+#endif
+
+  /* need canonical sign to get right result */
+  ASSERT (den1_size > 0);
+
+  if (UNLIKELY (den2 == 0))
+    DIVIDE_BY_ZERO;
+
+  if (num2 == 0)
+    return num1_size;
+  if (num1_size <= 0)
+    return -1;
+
+  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.
+     Same for NUM2 x DEN1 with respect to TMP2_SIZE.  */
+  /* If frac2 <= 1 (i.e. num2 <= den2), shortcut with a simpler
+     condition: num1 > den1. Here we only test sizes. */
+  if (num1_size > den1_size + (num2 > den2))
+    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */
+    return num1_size;
+  if (den1_size > num1_size + (den2 > num2))
+    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */
+    return -num1_size;
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (tmp1_ptr, num1_size + 1, tmp2_ptr, den1_size + 1);
+
+  cy_limb = mpn_mul_1 (tmp1_ptr, PTR(NUM(op1)), num1_size,
+                       (mp_limb_t) den2);
+  tmp1_ptr[num1_size] = cy_limb;
+  tmp1_size = num1_size + (cy_limb != 0);
+
+  cy_limb = mpn_mul_1 (tmp2_ptr, PTR(DEN(op1)), den1_size,
+                       (mp_limb_t) num2);
+  tmp2_ptr[den1_size] = cy_limb;
+  tmp2_size = den1_size + (cy_limb != 0);
+
+  cc = tmp1_size - tmp2_size;
+  cc = cc != 0 ? cc : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);
+  TMP_FREE;
+  return cc;
+}

diff --git a/mpq/div.c b/mpq/div.c
new file mode 100644
index 0000000..3bd9726
--- /dev/null
+++ b/mpq/div.c

@@ -0,0 +1,133 @@
+/* mpq_div -- divide two rational numbers.
+
+Copyright 1991, 1994-1996, 2000, 2001, 2015, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpq_div (mpq_ptr quot, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpz_t gcd1, gcd2;
+  mpz_t tmp1, tmp2;
+  mp_size_t op1_size;
+  mp_size_t op2_size;
+  mp_size_t alloc;
+  TMP_DECL;
+
+  op2_size = SIZ(NUM(op2));
+
+  if (UNLIKELY (op2_size == 0))
+    DIVIDE_BY_ZERO;
+
+  if (UNLIKELY (quot == op2))
+    {
+      if (UNLIKELY (op1 == op2))
+	{
+	  mpq_set_ui (quot, 1, 1);
+	  return;
+	}
+
+      /* We checked for op1 == op2: we are not in the x=x/x case.
+	 We compute x=y/x by computing x=inv(x)*y */
+      MPN_PTR_SWAP (PTR(NUM(quot)), ALLOC(NUM(quot)),
+		    PTR(DEN(quot)), ALLOC(DEN(quot)));
+      if (op2_size > 0)
+	{
+	  SIZ(NUM(quot)) = SIZ(DEN(quot));
+	  SIZ(DEN(quot)) = op2_size;
+	}
+      else
+	{
+	  SIZ(NUM(quot)) = - SIZ(DEN(quot));
+	  SIZ(DEN(quot)) = - op2_size;
+	}
+      mpq_mul (quot, quot, op1);
+      return;
+    }
+
+  op1_size = ABSIZ(NUM(op1));
+
+  if (op1_size == 0)
+    {
+      /* We special case this to simplify allocation logic; gcd(0,x) = x
+	 is a singular case for the allocations.  */
+      SIZ(NUM(quot)) = 0;
+      MPZ_NEWALLOC (DEN(quot), 1)[0] = 1;
+      SIZ(DEN(quot)) = 1;
+      return;
+    }
+
+  op2_size = ABS(op2_size);
+
+  TMP_MARK;
+
+  alloc = MIN (op1_size, op2_size);
+  MPZ_TMP_INIT (gcd1, alloc);
+
+  alloc = MAX (op1_size, op2_size);
+  MPZ_TMP_INIT (tmp1, alloc);
+
+  op2_size = SIZ(DEN(op2));
+  op1_size = SIZ(DEN(op1));
+
+  alloc = MIN (op1_size, op2_size);
+  MPZ_TMP_INIT (gcd2, alloc);
+
+  alloc = MAX (op1_size, op2_size);
+  MPZ_TMP_INIT (tmp2, alloc);
+
+  /* QUOT might be identical to OP1, so don't store the result there
+     until we are finished with the input operand.  We can overwrite
+     the numerator of QUOT when we are finished with the numerator of
+     OP1. */
+
+  mpz_gcd (gcd1, NUM(op1), NUM(op2));
+  mpz_gcd (gcd2, DEN(op2), DEN(op1));
+
+  mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op2), gcd2);
+
+  mpz_mul (NUM(quot), tmp1, tmp2);
+
+  mpz_divexact_gcd (tmp1, NUM(op2), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
+
+  mpz_mul (DEN(quot), tmp1, tmp2);
+
+  /* Keep the denominator positive.  */
+  if (SIZ(DEN(quot)) < 0)
+    {
+      SIZ(DEN(quot)) = -SIZ(DEN(quot));
+      SIZ(NUM(quot)) = -SIZ(NUM(quot));
+    }
+
+  TMP_FREE;
+}

diff --git a/mpq/equal.c b/mpq/equal.c
new file mode 100644
index 0000000..9835e04
--- /dev/null
+++ b/mpq/equal.c

@@ -0,0 +1,68 @@
+/* mpq_equal(u,v) -- Compare U, V.  Return non-zero if they are equal, zero
+   if they are non-equal.
+
+Copyright 1996, 2001, 2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpq_equal (mpq_srcptr op1, mpq_srcptr op2) __GMP_NOTHROW
+{
+  mp_size_t  num1_size, num2_size, den1_size, den2_size, i;
+  mp_srcptr  num1_ptr,  num2_ptr,  den1_ptr,  den2_ptr;
+
+  /* need fully canonical for correct results */
+  ASSERT_MPQ_CANONICAL (op1);
+  ASSERT_MPQ_CANONICAL (op2);
+
+  num1_size = SIZ(NUM(op1));
+  num2_size = SIZ(NUM(op2));
+  if (num1_size != num2_size)
+    return 0;
+
+  den1_size = SIZ(DEN(op1));
+  den2_size = SIZ(DEN(op2));
+  if (den1_size != den2_size)
+    return 0;
+
+  num1_ptr = PTR(NUM(op1));
+  num2_ptr = PTR(NUM(op2));
+  num1_size = ABS (num1_size);
+  for (i = 0; i < num1_size; i++)
+    if (num1_ptr[i] != num2_ptr[i])
+      return 0;
+
+  den1_ptr = PTR(DEN(op1));
+  den2_ptr = PTR(DEN(op2));
+  for (i = 0; i < den1_size; i++)
+    if (den1_ptr[i] != den2_ptr[i])
+      return 0;
+
+  return 1;
+}

diff --git a/mpq/get_d.c b/mpq/get_d.c
new file mode 100644
index 0000000..4d9779d
--- /dev/null
+++ b/mpq/get_d.c

@@ -0,0 +1,162 @@
+/* double mpq_get_d (mpq_t src) -- mpq to double, rounding towards zero.
+
+Copyright 1995, 1996, 2001-2005, 2018, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* All that's needed is to get the high 53 bits of the quotient num/den,
+   rounded towards zero.  More than 53 bits is fine, any excess is ignored
+   by mpn_get_d.
+
+   N_QLIMBS is how many quotient limbs we need to satisfy the mantissa of a
+   double, assuming the highest of those limbs is non-zero.  The target
+   qsize for mpn_tdiv_qr is then 1 more than this, since that function may
+   give a zero in the high limb (and non-zero in the second highest).
+
+   The use of 8*sizeof(double) in N_QLIMBS is an overestimate of the
+   mantissa bits, but it gets the same result as the true value (53 or 48 or
+   whatever) when rounded up to a multiple of GMP_NUMB_BITS, for non-nails.
+
+   Enhancements:
+
+   Use the true mantissa size in the N_QLIMBS formula, to save a divide step
+   in nails.
+
+   Examine the high limbs of num and den to see if the highest 1 bit of the
+   quotient will fall high enough that just N_QLIMBS-1 limbs is enough to
+   get the necessary bits, thereby saving a division step.
+
+   Bit shift either num or den to arrange for the above condition on the
+   high 1 bit of the quotient, to save a division step always.  A shift to
+   save a division step is definitely worthwhile with mpn_tdiv_qr, though we
+   may want to reassess this on big num/den when a quotient-only division
+   exists.
+
+   Maybe we could estimate the final exponent using nsize-dsize (and
+   possibly the high limbs of num and den), so as to detect overflow and
+   return infinity or zero quickly.  Overflow is never very helpful to an
+   application, and can therefore probably be regarded as abnormal, but we
+   may still like to optimize it if the conditions are easy.  (This would
+   only be for float formats we know, unknown formats are not important and
+   can be left to mpn_get_d.)
+
+   Future:
+
+   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of
+   padding n with zeros in temporary space.
+
+   Alternatives:
+
+   An alternative algorithm, that may be faster:
+   0. Let n be somewhat larger than the number of significant bits in a double.
+   1. Extract the most significant n bits of the denominator, and an equal
+      number of bits from the numerator.
+   2. Interpret the extracted numbers as integers, call them a and b
+      respectively, and develop n bits of the fractions ((a + 1) / b) and
+      (a / (b + 1)) using mpn_divrem.
+   3. If the computed values are identical UP TO THE POSITION WE CARE ABOUT,
+      we are done.  If they are different, repeat the algorithm from step 1,
+      but first let n = n * 2.
+   4. If we end up using all bits from the numerator and denominator, fall
+      back to a plain division.
+   5. Just to make life harder, The computation of a + 1 and b + 1 above
+      might give carry-out...  Needs special handling.  It might work to
+      subtract 1 in both cases instead.
+
+   Not certain if this approach would be faster than a quotient-only
+   division.  Presumably such optimizations are the sort of thing we would
+   like to have helping everywhere that uses a quotient-only division. */
+
+double
+mpq_get_d (mpq_srcptr src)
+{
+  double res;
+  mp_srcptr np, dp;
+  mp_ptr temp;
+  mp_size_t nsize = SIZ(NUM(src));
+  mp_size_t dsize = SIZ(DEN(src));
+  mp_size_t qsize, prospective_qsize, zeros;
+  mp_size_t sign_quotient = nsize;
+  long exp;
+#define N_QLIMBS (1 + (sizeof (double) + GMP_LIMB_BYTES-1) / GMP_LIMB_BYTES)
+  mp_limb_t qarr[N_QLIMBS + 1];
+  mp_ptr qp = qarr;
+  TMP_DECL;
+
+  ASSERT (dsize > 0);    /* canonical src */
+
+  /* mpn_get_d below requires a non-zero operand */
+  if (UNLIKELY (nsize == 0))
+    return 0.0;
+
+  TMP_MARK;
+  nsize = ABS (nsize);
+  dsize = ABS (dsize);
+  np = PTR(NUM(src));
+  dp = PTR(DEN(src));
+
+  prospective_qsize = nsize - dsize;       /* from using given n,d */
+  qsize = N_QLIMBS;                        /* desired qsize */
+
+  zeros = qsize - prospective_qsize;       /* padding n to get qsize */
+  exp = (long) -zeros * GMP_NUMB_BITS;     /* relative to low of qp */
+
+  /* zero extend n into temporary space, if necessary */
+  if (zeros > 0)
+    {
+      mp_size_t tsize;
+      tsize = nsize + zeros;               /* size for copy of n */
+
+      temp = TMP_ALLOC_LIMBS (tsize + 1);
+      MPN_FILL (temp, zeros, 0);
+      MPN_COPY (temp + zeros, np, nsize);
+      np = temp;
+      nsize = tsize;
+    }
+  else /* negative zeros means shorten n */
+    {
+      np -= zeros;
+      nsize += zeros;
+
+      temp = TMP_ALLOC_LIMBS (nsize + 1);
+    }
+
+  ASSERT (qsize == nsize - dsize);
+  mpn_div_q (qp, np, nsize, dp, dsize, temp);
+
+  /* strip possible zero high limb */
+  qsize += (qp[qsize] != 0);
+
+  res = mpn_get_d (qp, qsize, sign_quotient, exp);
+  TMP_FREE;
+  return res;
+}

diff --git a/mpq/get_den.c b/mpq/get_den.c
new file mode 100644
index 0000000..d4fd90b
--- /dev/null
+++ b/mpq/get_den.c

@@ -0,0 +1,42 @@
+/* mpq_get_den(den,rat_src) -- Set DEN to the denominator of RAT_SRC.
+
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_get_den (mpz_ptr den, mpq_srcptr src)
+{
+  mp_size_t size = SIZ(DEN(src));
+  mp_ptr dp;
+
+  dp = MPZ_NEWALLOC (den, size);
+  SIZ(den) = size;
+  MPN_COPY (dp, PTR(DEN(src)), size);
+}

diff --git a/mpq/get_num.c b/mpq/get_num.c
new file mode 100644
index 0000000..079211c
--- /dev/null
+++ b/mpq/get_num.c

@@ -0,0 +1,44 @@
+ /* mpq_get_num(num,rat_src) -- Set NUM to the numerator of RAT_SRC.
+
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_get_num (mpz_ptr num, mpq_srcptr src)
+{
+  mp_size_t size = SIZ(NUM(src));
+  mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
+
+  dp = MPZ_NEWALLOC (num, abs_size);
+  SIZ(num) = size;
+
+  MPN_COPY (dp, PTR(NUM(src)), abs_size);
+}

diff --git a/mpq/get_str.c b/mpq/get_str.c
new file mode 100644
index 0000000..d0cfb58
--- /dev/null
+++ b/mpq/get_str.c

@@ -0,0 +1,80 @@
+/* mpq_get_str -- mpq to string conversion.
+
+Copyright 2001, 2002, 2006, 2011, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+char *
+mpq_get_str (char *str, int base, mpq_srcptr q)
+{
+  size_t  str_alloc, len;
+
+  if (base > 62 || base < -36)
+    return NULL;
+
+  str_alloc = 0;
+  if (str == NULL)
+    {
+      /* This is an overestimate since we don't bother checking how much of
+	 the high limbs of num and den are used.  +2 for rounding up the
+	 chars per bit of num and den.  +3 for sign, slash and '\0'.  */
+      if (ABS(base) < 2)
+	base = 10;
+      DIGITS_IN_BASE_PER_LIMB (str_alloc, ABSIZ(NUM(q)) + SIZ(DEN(q)), ABS(base));
+      str_alloc += 6;
+
+      str = __GMP_ALLOCATE_FUNC_TYPE (str_alloc, char);
+    }
+
+  mpz_get_str (str, base, mpq_numref(q));
+  len = strlen (str);
+  if (! MPZ_EQUAL_1_P (mpq_denref (q)))
+    {
+      str[len++] = '/';
+      mpz_get_str (str+len, base, mpq_denref(q));
+      len += strlen (str+len);
+    }
+
+  ASSERT (len == strlen(str));
+  ASSERT (str_alloc == 0 || len+1 <= str_alloc);
+  ASSERT (len+1 <= 3 + /* size recommended to applications */
+	  (ABS(base) < 2 ?
+	   mpz_sizeinbase (mpq_numref(q), 10) +
+	   mpz_sizeinbase (mpq_denref(q), 10)
+	   : mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+	   mpz_sizeinbase (mpq_denref(q), ABS(base))));
+
+  if (str_alloc != 0)
+    __GMP_REALLOCATE_FUNC_MAYBE_TYPE (str, str_alloc, len+1, char);
+
+  return str;
+}

diff --git a/mpq/init.c b/mpq/init.c
new file mode 100644
index 0000000..5310066
--- /dev/null
+++ b/mpq/init.c

@@ -0,0 +1,45 @@
+/* mpq_init -- Make a new rational number with value 0/1.
+
+Copyright 1991, 1994, 1995, 2000-2002, 2015, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_init (mpq_ptr x)
+{
+  static const mp_limb_t dummy_limb=0xc1a0;
+  ALLOC(NUM(x)) = 0;
+  PTR(NUM(x)) = (mp_ptr) &dummy_limb;
+  SIZ(NUM(x)) = 0;
+  ALLOC(DEN(x)) = 1;
+  PTR(DEN(x)) = __GMP_ALLOCATE_FUNC_LIMBS (1);
+  PTR(DEN(x))[0] = 1;
+  SIZ(DEN(x)) = 1;
+}

diff --git a/mpq/inits.c b/mpq/inits.c
new file mode 100644
index 0000000..fff4343
--- /dev/null
+++ b/mpq/inits.c

@@ -0,0 +1,49 @@
+/* mpq_inits() -- Initialize multiple mpq_t variables and set them to 0.
+
+Copyright 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpq_inits (mpq_ptr x, ...)
+{
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      mpq_init (x);
+      x = va_arg (ap, mpq_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpq/inp_str.c b/mpq/inp_str.c
new file mode 100644
index 0000000..b7662bc
--- /dev/null
+++ b/mpq/inp_str.c

@@ -0,0 +1,75 @@
+/* mpq_inp_str -- read an mpq from a FILE.
+
+Copyright 2001, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp-impl.h"
+
+
+size_t
+mpq_inp_str (mpq_ptr q, FILE *fp, int base)
+{
+  size_t  nread;
+  int     c;
+
+  if (fp == NULL)
+    fp = stdin;
+
+  SIZ(DEN(q)) = 1;
+  MPZ_NEWALLOC (DEN(q), 1)[0] = 1;
+
+  nread = mpz_inp_str (mpq_numref(q), fp, base);
+  if (nread == 0)
+    return 0;
+
+  c = getc (fp);
+  nread++;
+
+  if (c == '/')
+    {
+      c = getc (fp);
+      nread++;
+
+      nread = mpz_inp_str_nowhite (mpq_denref(q), fp, base, c, nread);
+      if (nread == 0)
+	{
+	  SIZ(NUM(q)) = 0;
+	  SIZ(DEN(q)) = 1;
+	  PTR(DEN(q))[0] = 1;
+	}
+    }
+  else
+    {
+      ungetc (c, fp);
+      nread--;
+    }
+
+  return nread;
+}

diff --git a/mpq/inv.c b/mpq/inv.c
new file mode 100644
index 0000000..c395984
--- /dev/null
+++ b/mpq/inv.c

@@ -0,0 +1,70 @@
+/* mpq_inv(dest,src) -- invert a rational number, i.e. set DEST to SRC
+   with the numerator and denominator swapped.
+
+Copyright 1991, 1994, 1995, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_inv (mpq_ptr dest, mpq_srcptr src)
+{
+  mp_size_t num_size = SIZ(NUM(src));
+  mp_size_t den_size = SIZ(DEN(src));
+
+  if (num_size < 0)
+    {
+      num_size = -num_size;
+      den_size = -den_size;
+    }
+  else if (UNLIKELY (num_size == 0))
+    DIVIDE_BY_ZERO;
+
+  SIZ(DEN(dest)) = num_size;
+  SIZ(NUM(dest)) = den_size;
+
+  /* If dest == src we may just swap the numerator and denominator;
+     we ensured that the new denominator is positive.  */
+
+  if (dest == src)
+    {
+      MP_PTR_SWAP (PTR(NUM(dest)), PTR(DEN(dest)));
+      MP_SIZE_T_SWAP (ALLOC(NUM(dest)), ALLOC(DEN(dest)));
+    }
+  else
+    {
+      mp_ptr dp;
+
+      den_size = ABS (den_size);
+      dp = MPZ_NEWALLOC (NUM(dest), den_size);
+      MPN_COPY (dp, PTR(DEN(src)), den_size);
+
+      dp = MPZ_NEWALLOC (DEN(dest), num_size);
+      MPN_COPY (dp, PTR(NUM(src)), num_size);
+    }
+}

diff --git a/mpq/md_2exp.c b/mpq/md_2exp.c
new file mode 100644
index 0000000..b3378b3
--- /dev/null
+++ b/mpq/md_2exp.c

@@ -0,0 +1,110 @@
+/* mpq_mul_2exp, mpq_div_2exp - multiply or divide by 2^N */
+
+/*
+Copyright 2000, 2002, 2012, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The multiplier/divisor "n", representing 2^n, is applied by right shifting
+   "r" until it's odd (if it isn't already), and left shifting "l" for the
+   rest. */
+
+static void
+mord_2exp (mpz_ptr ldst, mpz_ptr rdst, mpz_srcptr lsrc, mpz_srcptr rsrc,
+           mp_bitcnt_t n)
+{
+  mp_size_t  rsrc_size = SIZ(rsrc);
+  mp_size_t  len = ABS (rsrc_size);
+  mp_ptr     rsrc_ptr = PTR(rsrc);
+  mp_ptr     p, rdst_ptr;
+  mp_limb_t  plow;
+
+  p = rsrc_ptr;
+  plow = *p;
+  while (n >= GMP_NUMB_BITS && plow == 0)
+    {
+      n -= GMP_NUMB_BITS;
+      p++;
+      plow = *p;
+    }
+
+  /* no realloc here if rsrc==rdst, so p and rsrc_ptr remain valid */
+  len -= (p - rsrc_ptr);
+  rdst_ptr = MPZ_REALLOC (rdst, len);
+
+  if ((plow & 1) || n == 0)
+    {
+      /* need INCR when src==dst */
+      if (p != rdst_ptr)
+        MPN_COPY_INCR (rdst_ptr, p, len);
+    }
+  else
+    {
+      unsigned long  shift;
+      if (plow == 0)
+        shift = n;
+      else
+        {
+          count_trailing_zeros (shift, plow);
+          shift = MIN (shift, n);
+        }
+      mpn_rshift (rdst_ptr, p, len, shift);
+      len -= (rdst_ptr[len-1] == 0);
+      n -= shift;
+    }
+  SIZ(rdst) = (rsrc_size >= 0) ? len : -len;
+
+  if (n)
+    mpz_mul_2exp (ldst, lsrc, n);
+  else if (ldst != lsrc)
+    mpz_set (ldst, lsrc);
+}
+
+
+void
+mpq_mul_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
+{
+  mord_2exp (NUM(dst), DEN(dst), NUM(src), DEN(src), n);
+}
+
+void
+mpq_div_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
+{
+  if (SIZ(NUM(src)) == 0)
+    {
+      SIZ(NUM(dst)) = 0;
+      SIZ(DEN(dst)) = 1;
+      MPZ_NEWALLOC (DEN(dst), 1)[0] = 1;
+      return;
+    }
+
+  mord_2exp (DEN(dst), NUM(dst), DEN(src), NUM(src), n);
+}

diff --git a/mpq/mul.c b/mpq/mul.c
new file mode 100644
index 0000000..270dafc
--- /dev/null
+++ b/mpq/mul.c

@@ -0,0 +1,102 @@
+/* mpq_mul -- multiply two rational numbers.
+
+Copyright 1991, 1994-1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpq_mul (mpq_ptr prod, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpz_t gcd1, gcd2;
+  mpz_t tmp1, tmp2;
+  mp_size_t op1_num_size;
+  mp_size_t op1_den_size;
+  mp_size_t op2_num_size;
+  mp_size_t op2_den_size;
+  mp_size_t alloc;
+  TMP_DECL;
+
+  if (op1 == op2)
+    {
+      /* No need for any GCDs when squaring. */
+      mpz_mul (mpq_numref (prod), mpq_numref (op1), mpq_numref (op1));
+      mpz_mul (mpq_denref (prod), mpq_denref (op1), mpq_denref (op1));
+      return;
+    }
+
+  op1_num_size = ABSIZ(NUM(op1));
+  op1_den_size =   SIZ(DEN(op1));
+  op2_num_size = ABSIZ(NUM(op2));
+  op2_den_size =   SIZ(DEN(op2));
+
+  if (op1_num_size == 0 || op2_num_size == 0)
+    {
+      /* We special case this to simplify allocation logic; gcd(0,x) = x
+	 is a singular case for the allocations.  */
+      SIZ(NUM(prod)) = 0;
+      MPZ_NEWALLOC (DEN(prod), 1)[0] = 1;
+      SIZ(DEN(prod)) = 1;
+      return;
+    }
+
+  TMP_MARK;
+
+  alloc = MIN (op1_num_size, op2_den_size);
+  MPZ_TMP_INIT (gcd1, alloc);
+
+  alloc = MIN (op2_num_size, op1_den_size);
+  MPZ_TMP_INIT (gcd2, alloc);
+
+  alloc = MAX (op1_num_size, op2_den_size);
+  MPZ_TMP_INIT (tmp1, alloc);
+
+  alloc = MAX (op2_num_size, op1_den_size);
+  MPZ_TMP_INIT (tmp2, alloc);
+
+  /* PROD might be identical to either operand, so don't store the result there
+     until we are finished with the input operands.  We can overwrite the
+     numerator of PROD when we are finished with the numerators of OP1 and
+     OP2.  */
+
+  mpz_gcd (gcd1, NUM(op1), DEN(op2));
+  mpz_gcd (gcd2, NUM(op2), DEN(op1));
+
+  mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+  mpz_divexact_gcd (tmp2, NUM(op2), gcd2);
+
+  mpz_mul (NUM(prod), tmp1, tmp2);
+
+  mpz_divexact_gcd (tmp1, DEN(op2), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
+
+  mpz_mul (DEN(prod), tmp1, tmp2);
+
+  TMP_FREE;
+}

diff --git a/mpq/neg.c b/mpq/neg.c
new file mode 100644
index 0000000..8fda41c
--- /dev/null
+++ b/mpq/neg.c

@@ -0,0 +1,57 @@
+/* mpq_neg -- negate a rational.
+
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpq_neg 1
+
+#include "gmp-impl.h"
+
+
+void
+mpq_neg (mpq_ptr dst, mpq_srcptr src)
+{
+  mp_size_t  num_size = SIZ(NUM(src));
+
+  if (src != dst)
+    {
+      mp_size_t  size;
+      mp_ptr dp;
+
+      size = ABS(num_size);
+      dp = MPZ_NEWALLOC (NUM(dst), size);
+      MPN_COPY (dp, PTR(NUM(src)), size);
+
+      size = SIZ(DEN(src));
+      dp = MPZ_NEWALLOC (DEN(dst), size);
+      SIZ(DEN(dst)) = size;
+      MPN_COPY (dp, PTR(DEN(src)), size);
+    }
+
+  SIZ(NUM(dst)) = -num_size;
+}

diff --git a/mpq/out_str.c b/mpq/out_str.c
new file mode 100644
index 0000000..5d2dd4e
--- /dev/null
+++ b/mpq/out_str.c

@@ -0,0 +1,53 @@
+/* mpq_out_str(stream,base,integer) */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+size_t
+mpq_out_str (FILE *stream, int base, mpq_srcptr q)
+{
+  size_t  written;
+
+  if (stream == NULL)
+    stream = stdout;
+
+  written = mpz_out_str (stream, base, mpq_numref (q));
+
+  if (mpz_cmp_ui (mpq_denref (q), 1) != 0)
+    {
+      putc ('/', stream);
+      written += 1 + mpz_out_str (stream, base, mpq_denref (q));
+    }
+
+  return ferror (stream) ? 0 : written;
+}

diff --git a/mpq/set.c b/mpq/set.c
new file mode 100644
index 0000000..87c15e3
--- /dev/null
+++ b/mpq/set.c

@@ -0,0 +1,51 @@
+/* mpq_set(dest,src) -- Set DEST to SRC.
+
+Copyright 1991, 1994, 1995, 2001, 2012, 2015 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set (mpq_ptr dest, mpq_srcptr src)
+{
+  mp_size_t num_size, den_size;
+  mp_size_t abs_num_size;
+  mp_ptr dp;
+
+  num_size = SIZ(NUM(src));
+  SIZ(NUM(dest)) = num_size;
+  abs_num_size = ABS (num_size);
+  dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+  MPN_COPY (dp, PTR(NUM(src)), abs_num_size);
+
+  den_size = SIZ(DEN(src));
+  SIZ(DEN(dest)) = den_size;
+  dp = MPZ_NEWALLOC (DEN(dest), den_size);
+  MPN_COPY (dp, PTR(DEN(src)), den_size);
+}

diff --git a/mpq/set_d.c b/mpq/set_d.c
new file mode 100644
index 0000000..3f3fab0
--- /dev/null
+++ b/mpq/set_d.c

@@ -0,0 +1,165 @@
+/* mpq_set_d(mpq_t q, double d) -- Set q to d without rounding.
+
+Copyright 2000, 2002, 2003, 2012, 2014, 2018 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if LIMBS_PER_DOUBLE > 4
+  choke me
+#endif
+
+void
+mpq_set_d (mpq_ptr dest, double d)
+{
+  int negative;
+  mp_exp_t exp;
+  mp_limb_t tp[LIMBS_PER_DOUBLE];
+  mp_ptr np, dp;
+  mp_size_t nn, dn;
+  int c;
+
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         __gmp_invalid_operation ());
+
+  negative = d < 0;
+  d = ABS (d);
+
+  exp = __gmp_extract_double (tp, d);
+
+  /* There are two main version of the conversion.  The `then' arm handles
+     numbers with a fractional part, while the `else' arm handles integers.  */
+#if LIMBS_PER_DOUBLE == 4
+  if (exp <= 1 || (exp == 2 && (tp[0] | tp[1]) != 0))
+#endif
+#if LIMBS_PER_DOUBLE == 3
+  if (exp <= 1 || (exp == 2 && tp[0] != 0))
+#endif
+#if LIMBS_PER_DOUBLE == 2
+  if (exp <= 1)
+#endif
+    {
+      if (d == 0.0)
+	{
+	  SIZ(NUM(dest)) = 0;
+	  SIZ(DEN(dest)) = 1;
+	  MPZ_NEWALLOC (DEN(dest), 1)[0] = 1;
+	  return;
+	}
+
+#if LIMBS_PER_DOUBLE == 4
+      np = MPZ_NEWALLOC (NUM(dest), 4);
+      if ((tp[0] | tp[1] | tp[2]) == 0)
+	np[0] = tp[3], nn = 1;
+      else if ((tp[0] | tp[1]) == 0)
+	np[1] = tp[3], np[0] = tp[2], nn = 2;
+      else if (tp[0] == 0)
+	np[2] = tp[3], np[1] = tp[2], np[0] = tp[1], nn = 3;
+      else
+	np[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 4;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+      np = MPZ_NEWALLOC (NUM(dest), 3);
+      if ((tp[0] | tp[1]) == 0)
+	np[0] = tp[2], nn = 1;
+      else if (tp[0] == 0)
+	np[1] = tp[2], np[0] = tp[1], nn = 2;
+      else
+	np[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 3;
+#endif
+#if LIMBS_PER_DOUBLE == 2
+      np = MPZ_NEWALLOC (NUM(dest), 2);
+      if (tp[0] == 0)
+	np[0] = tp[1], nn = 1;
+      else
+	np[1] = tp[1], np[0] = tp[0], nn = 2;
+#endif
+      dn = nn + 1 - exp;
+      ASSERT (dn > 0); /* -exp >= -1; nn >= 1*/
+      dp = MPZ_NEWALLOC (DEN(dest), dn);
+      MPN_ZERO (dp, dn - 1);
+      dp[dn - 1] = 1;
+      count_trailing_zeros (c, np[0] | dp[0]);
+      if (c != 0)
+	{
+	  mpn_rshift (np, np, nn, c);
+	  nn -= np[nn - 1] == 0;
+	  --dn;
+	  dp[dn - 1] = CNST_LIMB(1) << (GMP_LIMB_BITS - c);
+	}
+      SIZ(DEN(dest)) = dn;
+    }
+  else
+    {
+      nn = exp;
+      np = MPZ_NEWALLOC (NUM(dest), nn);
+      switch (nn)
+        {
+	default:
+	  MPN_ZERO (np, nn - LIMBS_PER_DOUBLE);
+	  np += nn - LIMBS_PER_DOUBLE;
+	  /* fall through */
+#if LIMBS_PER_DOUBLE == 2
+	case 2:
+	  np[1] = tp[1], np[0] = tp[0];
+	  break;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+	case 3:
+	  np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];
+	  break;
+	case 2:
+	  np[1] = tp[2], np[0] = tp[1];
+	  break;
+#endif
+#if LIMBS_PER_DOUBLE == 4
+	case 4:
+	  np[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];
+	  break;
+	case 3:
+	  np[2] = tp[3], np[1] = tp[2], np[0] = tp[1];
+	  break;
+	case 2:
+	  np[1] = tp[3], np[0] = tp[2];
+	  break;
+#endif
+	}
+      MPZ_NEWALLOC (DEN(dest), 1)[0] = 1;
+      SIZ(DEN(dest)) = 1;
+    }
+  SIZ(NUM(dest)) = negative ? -nn : nn;
+}

diff --git a/mpq/set_den.c b/mpq/set_den.c
new file mode 100644
index 0000000..e249db7
--- /dev/null
+++ b/mpq/set_den.c

@@ -0,0 +1,45 @@
+/* mpq_set_den(dest,den) -- Set the denominator of DEST from DEN.
+
+Copyright 1991, 1994-1996, 2000, 2001, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set_den (mpq_ptr dest, mpz_srcptr den)
+{
+  mp_size_t size = SIZ (den);
+  mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
+
+  SIZ(DEN(dest)) = size;
+  dp = MPZ_NEWALLOC (DEN(dest), abs_size);
+
+  MPN_COPY (dp, PTR(den), abs_size);
+}

diff --git a/mpq/set_f.c b/mpq/set_f.c
new file mode 100644
index 0000000..581d4fc
--- /dev/null
+++ b/mpq/set_f.c

@@ -0,0 +1,106 @@
+/* mpq_set_f -- set an mpq from an mpf.
+
+Copyright 2000-2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpq_set_f (mpq_ptr q, mpf_srcptr f)
+{
+  mp_size_t  fexp = EXP(f);
+  mp_ptr     fptr = PTR(f);
+  mp_size_t  fsize = SIZ(f);
+  mp_size_t  abs_fsize = ABS(fsize);
+  mp_limb_t  flow;
+
+  if (fsize == 0)
+    {
+      /* set q=0 */
+      SIZ(NUM(q)) = 0;
+      SIZ(DEN(q)) = 1;
+      MPZ_NEWALLOC (DEN(q), 1)[0] = 1;
+      return;
+    }
+
+  /* strip low zero limbs from f */
+  flow = *fptr;
+  MPN_STRIP_LOW_ZEROS_NOT_ZERO (fptr, abs_fsize, flow);
+
+  if (fexp >= abs_fsize)
+    {
+      /* radix point is to the right of the limbs, no denominator */
+      mp_ptr  num_ptr;
+
+      num_ptr = MPZ_NEWALLOC (mpq_numref (q), fexp);
+      MPN_ZERO (num_ptr, fexp - abs_fsize);
+      MPN_COPY (num_ptr + fexp - abs_fsize, fptr, abs_fsize);
+
+      SIZ(NUM(q)) = fsize >= 0 ? fexp : -fexp;
+      SIZ(DEN(q)) = 1;
+      MPZ_NEWALLOC (DEN(q), 1)[0] = 1;
+    }
+  else
+    {
+      /* radix point is within or to the left of the limbs, use denominator */
+      mp_ptr     num_ptr, den_ptr;
+      mp_size_t  den_size;
+
+      den_size = abs_fsize - fexp;
+      num_ptr = MPZ_NEWALLOC (mpq_numref (q), abs_fsize);
+      den_ptr = MPZ_NEWALLOC (mpq_denref (q), den_size+1);
+
+      if (flow & 1)
+        {
+          /* no powers of two to strip from numerator */
+
+          MPN_COPY (num_ptr, fptr, abs_fsize);
+          MPN_ZERO (den_ptr, den_size);
+          den_ptr[den_size] = 1;
+        }
+      else
+        {
+          /* right shift numerator, adjust denominator accordingly */
+          int  shift;
+
+          den_size--;
+          count_trailing_zeros (shift, flow);
+
+          mpn_rshift (num_ptr, fptr, abs_fsize, shift);
+          abs_fsize -= (num_ptr[abs_fsize-1] == 0);
+
+          MPN_ZERO (den_ptr, den_size);
+          den_ptr[den_size] = GMP_LIMB_HIGHBIT >> (shift-1);
+        }
+
+      SIZ(NUM(q)) = fsize >= 0 ? abs_fsize : -abs_fsize;
+      SIZ(DEN(q)) = den_size + 1;
+    }
+}

diff --git a/mpq/set_num.c b/mpq/set_num.c
new file mode 100644
index 0000000..1a099a2
--- /dev/null
+++ b/mpq/set_num.c

@@ -0,0 +1,45 @@
+/* mpq_set_num(dest,num) -- Set the numerator of DEST from NUM.
+
+Copyright 1991, 1994, 1995, 2001, 2012, 2015 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set_num (mpq_ptr dest, mpz_srcptr num)
+{
+  mp_size_t size = SIZ (num);
+  mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
+
+  SIZ(NUM(dest)) = size;
+  dp = MPZ_NEWALLOC (NUM(dest), abs_size);
+
+  MPN_COPY (dp, PTR(num), abs_size);
+}

diff --git a/mpq/set_si.c b/mpq/set_si.c
new file mode 100644
index 0000000..3159899
--- /dev/null
+++ b/mpq/set_si.c

@@ -0,0 +1,60 @@
+/* mpq_set_si(dest,ulong_num,ulong_den) -- Set DEST to the rational number
+   ULONG_NUM/ULONG_DEN.
+
+Copyright 1991, 1994, 1995, 2001, 2003, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set_si (mpq_ptr dest, signed long int num, unsigned long int den)
+{
+  if (GMP_NUMB_BITS < BITS_PER_ULONG)
+    {
+      if (num == 0)  /* Canonicalize 0/d to 0/1.  */
+        den = 1;
+      mpz_set_si (mpq_numref (dest), num);
+      mpz_set_ui (mpq_denref (dest), den);
+      return;
+    }
+
+  if (num == 0)
+    {
+      /* Canonicalize 0/d to 0/1.  */
+      den = 1;
+      SIZ(NUM(dest)) = 0;
+    }
+  else
+    {
+      MPZ_NEWALLOC (NUM(dest), 1)[0] = ABS_CAST (unsigned long, num);
+      SIZ(NUM(dest)) = num > 0 ? 1 : -1;
+    }
+
+  MPZ_NEWALLOC (DEN(dest), 1)[0] = den;
+  SIZ(DEN(dest)) = (den != 0);
+}

diff --git a/mpq/set_str.c b/mpq/set_str.c
new file mode 100644
index 0000000..664bb2d
--- /dev/null
+++ b/mpq/set_str.c

@@ -0,0 +1,68 @@
+/* mpq_set_str -- string to mpq conversion.
+
+Copyright 2001, 2002, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp-impl.h"
+
+
+/* FIXME: Would like an mpz_set_mem (or similar) accepting a pointer and
+   length so we wouldn't have to copy the numerator just to null-terminate
+   it.  */
+
+int
+mpq_set_str (mpq_ptr q, const char *str, int base)
+{
+  const char  *slash;
+  char        *num;
+  size_t      numlen;
+  int         ret;
+
+  slash = strchr (str, '/');
+  if (slash == NULL)
+    {
+      SIZ(DEN(q)) = 1;
+      MPZ_NEWALLOC (DEN(q), 1)[0] = 1;
+
+      return mpz_set_str (mpq_numref(q), str, base);
+    }
+
+  numlen = slash - str;
+  num = __GMP_ALLOCATE_FUNC_TYPE (numlen+1, char);
+  memcpy (num, str, numlen);
+  num[numlen] = '\0';
+  ret = mpz_set_str (mpq_numref(q), num, base);
+  __GMP_FREE_FUNC_TYPE (num, numlen+1, char);
+
+  if (ret != 0)
+    return ret;
+
+  return mpz_set_str (mpq_denref(q), slash+1, base);
+}

diff --git a/mpq/set_ui.c b/mpq/set_ui.c
new file mode 100644
index 0000000..b03d4e3
--- /dev/null
+++ b/mpq/set_ui.c

@@ -0,0 +1,60 @@
+/* mpq_set_ui(dest,ulong_num,ulong_den) -- Set DEST to the rational number
+   ULONG_NUM/ULONG_DEN.
+
+Copyright 1991, 1994, 1995, 2001-2003, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set_ui (mpq_ptr dest, unsigned long int num, unsigned long int den)
+{
+  if (GMP_NUMB_BITS < BITS_PER_ULONG)
+    {
+      if (num == 0)  /* Canonicalize 0/d to 0/1.  */
+        den = 1;
+      mpz_set_ui (mpq_numref (dest), num);
+      mpz_set_ui (mpq_denref (dest), den);
+      return;
+    }
+
+  if (num == 0)
+    {
+      /* Canonicalize 0/d to 0/1.  */
+      den = 1;
+      SIZ(NUM(dest)) = 0;
+    }
+  else
+    {
+      MPZ_NEWALLOC (NUM(dest), 1)[0] = num;
+      SIZ(NUM(dest)) = 1;
+    }
+
+  MPZ_NEWALLOC (DEN(dest), 1)[0] = den;
+  SIZ(DEN(dest)) = (den != 0);
+}

diff --git a/mpq/set_z.c b/mpq/set_z.c
new file mode 100644
index 0000000..8309c93
--- /dev/null
+++ b/mpq/set_z.c

@@ -0,0 +1,48 @@
+/* mpq_set_z (dest,src) -- Set DEST to SRC.
+
+Copyright 1996, 2001, 2012, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_set_z (mpq_ptr dest, mpz_srcptr src)
+{
+  mp_size_t num_size;
+  mp_size_t abs_num_size;
+  mp_ptr dp;
+
+  num_size = SIZ (src);
+  SIZ(NUM(dest)) = num_size;
+  abs_num_size = ABS (num_size);
+  dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+  MPN_COPY (dp, PTR(src), abs_num_size);
+
+  MPZ_NEWALLOC (DEN(dest), 1)[0] = 1;
+  SIZ(DEN(dest)) = 1;
+}

diff --git a/mpq/swap.c b/mpq/swap.c
new file mode 100644
index 0000000..e5152d4
--- /dev/null
+++ b/mpq/swap.c

@@ -0,0 +1,42 @@
+/* mpq_swap (U, V) -- Swap U and V.
+
+Copyright 1997, 1998, 2000, 2001, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpq_swap (mpq_ptr u, mpq_ptr v) __GMP_NOTHROW
+{
+  MP_SIZE_T_SWAP (ALLOC(NUM(u)), ALLOC(NUM(v)));
+  MP_SIZE_T_SWAP (ALLOC(DEN(u)), ALLOC(DEN(v)));
+  MP_SIZE_T_SWAP (SIZ(NUM(u)), SIZ(NUM(v)));
+  MP_SIZE_T_SWAP (SIZ(DEN(u)), SIZ(DEN(v)));
+  MP_PTR_SWAP (PTR(NUM(u)), PTR(NUM(v)));
+  MP_PTR_SWAP (PTR(DEN(u)), PTR(DEN(v)));
+}

diff --git a/mpz/2fac_ui.c b/mpz/2fac_ui.c
new file mode 100644
index 0000000..141a0a7
--- /dev/null
+++ b/mpz/2fac_ui.c

@@ -0,0 +1,100 @@
+/* mpz_2fac_ui(RESULT, N) -- Set RESULT to N!!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012, 2015, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+#define FAC_2DSC_THRESHOLD ((FAC_DSC_THRESHOLD << 1) | (FAC_DSC_THRESHOLD & 1))
+#define FACTORS_PER_LIMB   (GMP_NUMB_BITS / (LOG2C(FAC_2DSC_THRESHOLD-1)+1))
+
+/* Computes n!!, the 2-multi-factorial of n. (aka double-factorial or semi-factorial)
+   WARNING: it assumes that n fits in a limb!
+ */
+void
+mpz_2fac_ui (mpz_ptr x, unsigned long n)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+
+  if ((n & 1) == 0) { /* n is even, n = 2k, (2k)!! = k! 2^k */
+    mp_limb_t count;
+
+    if ((n <= TABLE_LIMIT_2N_MINUS_POPC_2N) & (n != 0))
+      count = __gmp_fac2cnt_table[n / 2 - 1];
+    else
+      {
+	popc_limb (count, n);	/* popc(n) == popc(k) */
+	count = n - count;		/* n - popc(n) == k + k - popc(k) */
+      }
+    mpz_oddfac_1 (x, n >> 1, 0);
+    mpz_mul_2exp (x, x, count);
+  } else { /* n is odd */
+    if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT) {
+      MPZ_NEWALLOC (x, 1)[0] = __gmp_odd2fac_table[n >> 1];
+      SIZ (x) = 1;
+    } else if (BELOW_THRESHOLD (n, FAC_2DSC_THRESHOLD)) { /* odd basecase, */
+      mp_limb_t *factors, prod, max_prod;
+      mp_size_t j;
+      TMP_SDECL;
+
+      /* FIXME: we might alloc a fixed amount 1+FAC_2DSC_THRESHOLD/FACTORS_PER_LIMB */
+      TMP_SMARK;
+      factors = TMP_SALLOC_LIMBS (1 + n / (2 * FACTORS_PER_LIMB));
+
+      factors[0] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+      j = 1;
+      prod = n;
+
+      max_prod = GMP_NUMB_MAX / FAC_2DSC_THRESHOLD;
+      while ((n -= 2) > ODD_DOUBLEFACTORIAL_TABLE_LIMIT)
+	FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
+
+      TMP_SFREE;
+    } else { /* for the asymptotically fast odd case, let oddfac do the job. */
+      mpz_oddfac_1 (x, n, 1);
+    }
+  }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
+#undef FAC_2DSC_THRESHOLD

diff --git a/mpz/abs.c b/mpz/abs.c
new file mode 100644
index 0000000..0cfbc49
--- /dev/null
+++ b/mpz/abs.c

@@ -0,0 +1,54 @@
+/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
+
+Copyright 1991, 1993-1995, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_abs 1
+
+#include "gmp-impl.h"
+
+void
+mpz_abs (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp;
+  mp_srcptr up;
+  mp_size_t size;
+
+  size = ABSIZ (u);
+
+  if (u != w)
+    {
+      wp = MPZ_NEWALLOC (w, size);
+
+      up = PTR (u);
+
+      MPN_COPY (wp, up, size);
+    }
+
+  SIZ (w) = size;
+}

diff --git a/mpz/add.c b/mpz/add.c
new file mode 100644
index 0000000..f1f0ae8
--- /dev/null
+++ b/mpz/add.c

@@ -0,0 +1,33 @@
+/* mpz_add -- add integers.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_add
+#include "aors.h"

diff --git a/mpz/add_ui.c b/mpz/add_ui.c
new file mode 100644
index 0000000..8fd15ad
--- /dev/null
+++ b/mpz/add_ui.c

@@ -0,0 +1,33 @@
+/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_add_ui
+#include "aors_ui.h"

diff --git a/mpz/and.c b/mpz/and.c
new file mode 100644
index 0000000..5d34547
--- /dev/null
+++ b/mpz/and.c

@@ -0,0 +1,222 @@
+/* mpz_and -- Logical and.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005, 2012,
+2015-2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size;
+  mp_size_t i;
+
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  if (op1_size < op2_size)
+    {
+      MPZ_SRCPTR_SWAP (op1, op2);
+      MP_SIZE_T_SWAP (op1_size, op2_size);
+    }
+
+  op1_ptr = PTR(op1);
+  op2_ptr = PTR(op2);
+
+  if (op2_size >= 0)
+    {
+      /* First loop finds the size of the result.  */
+      for (i = op2_size; --i >= 0;)
+	if ((op1_ptr[i] & op2_ptr[i]) != 0)
+	  {
+	    res_size = i + 1;
+	    /* Handle allocation, now then we know exactly how much space is
+	       needed for the result.  */
+	    /* Don't re-read op1_ptr and op2_ptr.  Since res_size <=
+	       MIN(op1_size, op2_size), res is not changed when op1
+	       is identical to res or op2 is identical to res.  */
+	    SIZ (res) = res_size;
+	    mpn_and_n (MPZ_NEWALLOC (res, res_size), op1_ptr, op2_ptr, res_size);
+	    return;
+	  }
+
+      SIZ (res) = 0;
+    }
+  else
+    {
+      TMP_DECL;
+
+      op2_size = -op2_size;
+      TMP_MARK;
+      if (op1_size < 0)
+	{
+	  mp_ptr opx, opy;
+
+	  /* Both operands are negative, so will be the result.
+	     -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
+	     = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 =
+	     = ((OP1 - 1) | (OP2 - 1)) + 1      */
+
+	  /* It might seem as we could end up with an (invalid) result with
+	     a leading zero-limb here when one of the operands is of the
+	     type 1,,0,,..,,.0.  But some analysis shows that we surely
+	     would get carry into the zero-limb in this situation...  */
+
+	  op1_size = -op1_size;
+
+	  TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
+	  mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+	  op1_ptr = opx;
+
+	  mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
+	  op2_ptr = opy;
+
+	  res_ptr = MPZ_NEWALLOC (res, 1 + op2_size);
+	  /* Don't re-read OP1_PTR and OP2_PTR.  They point to temporary
+	     space--never to the space PTR(res) used to point to before
+	     reallocation.  */
+
+	  MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+		    op2_size - op1_size);
+	  mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+	  TMP_FREE;
+	  res_size = op2_size;
+
+	  res_ptr[res_size] = 0;
+	  MPN_INCR_U (res_ptr, res_size + 1, (mp_limb_t) 1);
+	  res_size += res_ptr[res_size];
+
+	  SIZ(res) = -res_size;
+	}
+      else
+	{
+#if ANDNEW
+	  mp_size_t op2_lim;
+	  mp_size_t count;
+
+	  /* OP2 must be negated as with infinite precision.
+
+	     Scan from the low end for a non-zero limb.  The first non-zero
+	     limb is simply negated (two's complement).  Any subsequent
+	     limbs are one's complemented.  Of course, we don't need to
+	     handle more limbs than there are limbs in the other, positive
+	     operand as the result for those limbs is going to become zero
+	     anyway.  */
+
+	  /* Scan for the least significant non-zero OP2 limb, and zero the
+	     result meanwhile for those limb positions.  (We will surely
+	     find a non-zero limb, so we can write the loop with one
+	     termination condition only.)  */
+	  for (i = 0; op2_ptr[i] == 0; i++)
+	    res_ptr[i] = 0;
+	  op2_lim = i;
+
+	  if (op1_size <= op2_size)
+	    {
+	      /* The ones-extended OP2 is >= than the zero-extended OP1.
+		 RES_SIZE <= OP1_SIZE.  Find the exact size.  */
+	      for (i = op1_size - 1; i > op2_lim; i--)
+		if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+		  break;
+	      res_size = i + 1;
+	      for (i = res_size - 1; i > op2_lim; i--)
+		res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+	      res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+	      /* Yes, this *can* happen!  */
+	      MPN_NORMALIZE (res_ptr, res_size);
+	    }
+	  else
+	    {
+	      /* The ones-extended OP2 is < than the zero-extended OP1.
+		 RES_SIZE == OP1_SIZE, since OP1 is normalized.  */
+	      res_size = op1_size;
+	      MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+	      for (i = op2_size - 1; i > op2_lim; i--)
+		res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+	      res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+	    }
+#else
+
+	  /* OP1 is positive and zero-extended,
+	     OP2 is negative and ones-extended.
+	     The result will be positive.
+	     OP1 & -OP2 = OP1 & ~(OP2 - 1).  */
+
+	  mp_ptr opx;
+
+	  opx = TMP_ALLOC_LIMBS (op2_size);
+	  mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+	  op2_ptr = opx;
+
+	  if (op1_size > op2_size)
+	    {
+	      /* The result has the same size as OP1, since OP1 is normalized
+		 and longer than the ones-extended OP2.  */
+	      res_size = op1_size;
+
+	      /* Handle allocation, now then we know exactly how much space is
+		 needed for the result.  */
+	      res_ptr = MPZ_NEWALLOC (res, res_size);
+	      /* Don't re-read OP1_PTR or OP2_PTR.  Since res_size = op1_size,
+		 op1 is not changed if it is identical to res.
+		 OP2_PTR points to temporary space.  */
+
+	      mpn_andn_n (res_ptr, op1_ptr, op2_ptr, op2_size);
+	      MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, res_size - op2_size);
+	    }
+	  else
+	    {
+	      /* Find out the exact result size.  Ignore the high limbs of OP2,
+		 OP1 is zero-extended and would make the result zero.  */
+	      res_size = 0;
+	      for (i = op1_size; --i >= 0;)
+		if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+		  {
+		    res_size = i + 1;
+		    /* Handle allocation, now then we know exactly how much
+		       space is needed for the result.  */
+		    /* Don't re-read OP1_PTR.  Since res_size <= op1_size,
+		       op1 is not changed if it is identical to res.  Don't
+		       re-read OP2_PTR.  It points to temporary space--never
+		       to the space PTR(res) used to point to before
+		       reallocation.  */
+		    mpn_andn_n (MPZ_NEWALLOC (res, res_size), op1_ptr, op2_ptr, res_size);
+
+		    break;
+		  }
+	    }
+#endif
+	  SIZ(res) = res_size;
+	  TMP_FREE;
+	}
+    }
+}

diff --git a/mpz/aors.h b/mpz/aors.h
new file mode 100644
index 0000000..782a5fe
--- /dev/null
+++ b/mpz/aors.h

@@ -0,0 +1,129 @@
+/* mpz_add, mpz_sub -- add or subtract integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2011, 2012, 2020 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_add
+#define FUNCTION     mpz_add
+#define VARIATION
+#endif
+#ifdef OPERATION_sub
+#define FUNCTION     mpz_sub
+#define VARIATION    -
+#endif
+
+#ifndef FUNCTION
+Error, need OPERATION_add or OPERATION_sub
+#endif
+
+
+void
+FUNCTION (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr wp;
+  mp_size_t usize, vsize, wsize;
+  mp_size_t abs_usize;
+  mp_size_t abs_vsize;
+
+  usize = SIZ(u);
+  vsize = VARIATION SIZ(v);
+  abs_usize = ABS (usize);
+  abs_vsize = ABS (vsize);
+
+  if (abs_usize < abs_vsize)
+    {
+      /* Swap U and V. */
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (usize, vsize);
+      MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+    }
+
+  /* True: ABS_USIZE >= ABS_VSIZE.  */
+
+  /* If not space for w (and possible carry), increase space.  */
+  wsize = abs_usize + 1;
+  wp = MPZ_REALLOC (w, wsize);
+
+  /* These must be after realloc (u or v may be the same as w).  */
+  up = PTR(u);
+  vp = PTR(v);
+
+  if ((usize ^ vsize) < 0)
+    {
+      /* U and V have different sign.  Need to compare them to determine
+	 which operand to subtract from which.  */
+
+      /* This test is right since ABS_USIZE >= ABS_VSIZE.  */
+      if (abs_usize != abs_vsize)
+	{
+	  mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+	  wsize = abs_usize;
+	  MPN_NORMALIZE_NOT_ZERO (wp, wsize);
+	  if (usize < 0)
+	    wsize = -wsize;
+	}
+      else
+	{
+	  int cmp = mpn_cmp (up, vp, abs_usize);
+	  if (cmp < 0)
+	    {
+	      mpn_sub_n (wp, vp, up, abs_usize);
+	      wsize = abs_usize;
+	      MPN_NORMALIZE_NOT_ZERO (wp, wsize);
+	      if (usize >= 0)
+		wsize = -wsize;
+	    }
+	  else if (cmp > 0)
+	    {
+	      mpn_sub_n (wp, up, vp, abs_usize);
+	      wsize = abs_usize;
+	      MPN_NORMALIZE_NOT_ZERO (wp, wsize);
+	      if (usize < 0)
+		wsize = -wsize;
+	    }
+	  else
+	    wsize = 0;
+	}
+    }
+  else
+    {
+      /* U and V have same sign.  Add them.  */
+      mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+      wp[abs_usize] = cy_limb;
+      wsize = abs_usize + cy_limb;
+      if (usize < 0)
+	wsize = -wsize;
+    }
+
+  SIZ(w) = wsize;
+}

diff --git a/mpz/aors_ui.h b/mpz/aors_ui.h
new file mode 100644
index 0000000..cbc467b
--- /dev/null
+++ b/mpz/aors_ui.h

@@ -0,0 +1,125 @@
+/* mpz_add_ui, mpz_sub_ui -- Add or subtract an mpz_t and an unsigned
+   one-word integer.
+
+Copyright 1991, 1993, 1994, 1996, 1999-2002, 2004, 2012, 2013, 2015,
+2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_add_ui
+#define FUNCTION          mpz_add_ui
+#define FUNCTION2         mpz_add
+#define VARIATION_CMP     >=
+#define VARIATION_NEG
+#define VARIATION_UNNEG   -
+#endif
+
+#ifdef OPERATION_sub_ui
+#define FUNCTION          mpz_sub_ui
+#define FUNCTION2         mpz_sub
+#define VARIATION_CMP     <
+#define VARIATION_NEG     -
+#define VARIATION_UNNEG
+#endif
+
+#ifndef FUNCTION
+Error, need OPERATION_add_ui or OPERATION_sub_ui
+#endif
+
+
+void
+FUNCTION (mpz_ptr w, mpz_srcptr u, unsigned long int vval)
+{
+  mp_srcptr up;
+  mp_ptr wp;
+  mp_size_t usize, wsize;
+  mp_size_t abs_usize;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (vval > GMP_NUMB_MAX)
+    {
+      mpz_t v;
+      mp_limb_t vl[2];
+      PTR(v) = vl;
+      vl[0] = vval & GMP_NUMB_MASK;
+      vl[1] = vval >> GMP_NUMB_BITS;
+      SIZ(v) = 2;
+      FUNCTION2 (w, u, v);
+      return;
+    }
+#endif
+
+  usize = SIZ (u);
+  if (usize == 0)
+    {
+      MPZ_NEWALLOC (w, 1)[0] = vval;
+      SIZ (w) = VARIATION_NEG (vval != 0);
+      return;
+    }
+
+  abs_usize = ABS (usize);
+
+  if (usize VARIATION_CMP 0)
+    {
+      mp_limb_t cy;
+
+      /* If not space for W (and possible carry), increase space.  */
+      wp = MPZ_REALLOC (w, abs_usize + 1);
+      /* These must be after realloc (U may be the same as W).  */
+      up = PTR (u);
+
+      cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) vval);
+      wp[abs_usize] = cy;
+      wsize = VARIATION_NEG (abs_usize + cy);
+    }
+  else
+    {
+      /* If not space for W, increase space.  */
+      wp = MPZ_REALLOC (w, abs_usize);
+      /* These must be after realloc (U may be the same as W).  */
+      up = PTR (u);
+
+      /* The signs are different.  Need exact comparison to determine
+	 which operand to subtract from which.  */
+      if (abs_usize == 1 && up[0] < vval)
+	{
+	  wp[0] = vval - up[0];
+	  wsize = VARIATION_NEG 1;
+	}
+      else
+	{
+	  mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) vval);
+	  /* Size can decrease with at most one limb.  */
+	  wsize = VARIATION_UNNEG (abs_usize - (wp[abs_usize - 1] == 0));
+	}
+    }
+
+  SIZ (w) = wsize;
+}

diff --git a/mpz/aorsmul.c b/mpz/aorsmul.c
new file mode 100644
index 0000000..ba766f6
--- /dev/null
+++ b/mpz/aorsmul.c

@@ -0,0 +1,179 @@
+/* mpz_addmul, mpz_submul -- add or subtract multiple.
+
+Copyright 2001, 2004, 2005, 2012, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* expecting x and y both with non-zero high limbs */
+#define mpn_cmp_twosizes_lt(xp,xsize, yp,ysize)                 \
+  ((xsize) < (ysize)                                            \
+   || ((xsize) == (ysize) && mpn_cmp (xp, yp, xsize) < 0))
+
+
+/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
+
+   The signs of w, x and y are fully accounted for by each flipping "sub".
+
+   The sign of w is retained for the result, unless the absolute value
+   submul underflows, in which case it flips.  */
+
+static void __gmpz_aorsmul (REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)) REGPARM_ATTR (1);
+#define mpz_aorsmul(w,x,y,sub)  __gmpz_aorsmul (REGPARM_3_1 (w, x, y, sub))
+
+REGPARM_ATTR (1) static void
+mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
+{
+  mp_size_t  xsize, ysize, tsize, wsize, wsize_signed;
+  mp_ptr     wp, tp;
+  mp_limb_t  c;
+  TMP_DECL;
+
+  /* w unaffected if x==0 or y==0 */
+  xsize = SIZ(x);
+  ysize = SIZ(y);
+  if (xsize == 0 || ysize == 0)
+    return;
+
+  /* make x the bigger of the two */
+  if (ABS(ysize) > ABS(xsize))
+    {
+      MPZ_SRCPTR_SWAP (x, y);
+      MP_SIZE_T_SWAP (xsize, ysize);
+    }
+
+  sub ^= ysize;
+  ysize = ABS(ysize);
+
+  /* use mpn_addmul_1/mpn_submul_1 if possible */
+  if (ysize == 1)
+    {
+      mpz_aorsmul_1 (w, x, PTR(y)[0], sub);
+      return;
+    }
+
+  sub ^= xsize;
+  xsize = ABS(xsize);
+
+  wsize_signed = SIZ(w);
+  sub ^= wsize_signed;
+  wsize = ABS(wsize_signed);
+
+  tsize = xsize + ysize;
+  wp = MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
+
+  if (wsize_signed == 0)
+    {
+      mp_limb_t  high;
+      /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,
+	 since we know x,y!=0 but w==0.  */
+      if (x == y)
+	{
+	  mpn_sqr (wp, PTR(x),xsize);
+	  high = wp[tsize-1];
+	}
+      else
+	high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
+      tsize -= (high == 0);
+      SIZ(w) = (sub >= 0 ? tsize : -tsize);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (tsize);
+
+  if (x == y)
+    {
+      mpn_sqr (tp, PTR(x),xsize);
+      tsize -= (tp[tsize-1] == 0);
+    }
+  else
+    {
+      mp_limb_t high;
+      high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize);
+      tsize -= (high == 0);
+    }
+  ASSERT (tp[tsize-1] != 0);
+  if (sub >= 0)
+    {
+      mp_srcptr up    = wp;
+      mp_size_t usize = wsize;
+
+      if (usize < tsize)
+	{
+	  up	= tp;
+	  usize = tsize;
+	  tp	= wp;
+	  tsize = wsize;
+
+	  wsize = usize;
+	}
+
+      c = mpn_add (wp, up,usize, tp,tsize);
+      wp[wsize] = c;
+      wsize += (c != 0);
+    }
+  else
+    {
+      mp_srcptr up    = wp;
+      mp_size_t usize = wsize;
+
+      if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
+	{
+	  up	= tp;
+	  usize = tsize;
+	  tp	= wp;
+	  tsize = wsize;
+
+	  wsize = usize;
+	  wsize_signed = -wsize_signed;
+	}
+
+      ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
+      wsize = usize;
+      MPN_NORMALIZE (wp, wsize);
+    }
+
+  SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize);
+
+  TMP_FREE;
+}
+
+
+void
+mpz_addmul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_aorsmul (w, u, v, (mp_size_t) 0);
+}
+
+void
+mpz_submul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_aorsmul (w, u, v, (mp_size_t) -1);
+}

diff --git a/mpz/aorsmul_i.c b/mpz/aorsmul_i.c
new file mode 100644
index 0000000..317be3e
--- /dev/null
+++ b/mpz/aorsmul_i.c

@@ -0,0 +1,254 @@
+/* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
+
+   THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
+   ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
+   COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2004, 2005, 2012, 2021, 2022 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#if HAVE_NATIVE_mpn_mul_1c
+#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
+  do {                                                  \
+    (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
+  } while (0)
+#else
+#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
+  do {                                                  \
+    mp_limb_t __cy;                                     \
+    __cy = mpn_mul_1 (dst, src, size, n);               \
+    (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
+  } while (0)
+#endif
+
+
+/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
+
+   All that's needed to account for negative w or x is to flip "sub".
+
+   The final w will retain its sign, unless an underflow occurs in a submul
+   of absolute values, in which case it's flipped.
+
+   If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
+   used.  The alternative would be mpn_mul_1 into temporary space followed
+   by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
+   a chance of being faster since it involves only one set of carry
+   propagations, not two.  Note that doing an addmul_1 with a
+   twos-complement negative y doesn't work, because it effectively adds an
+   extra x * 2^GMP_LIMB_BITS.  */
+
+REGPARM_ATTR(1) void
+mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
+{
+  mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
+  mp_srcptr  xp;
+  mp_ptr     wp;
+  mp_limb_t  cy;
+
+  /* w unaffected if x==0 or y==0 */
+  xsize = SIZ (x);
+  if (xsize == 0 || y == 0)
+    return;
+
+  sub ^= xsize;
+  xsize = ABS (xsize);
+
+  wsize_signed = SIZ (w);
+  if (wsize_signed == 0)
+    {
+      /* nothing to add to, just set x*y, "sub" gives the sign */
+      wp = MPZ_NEWALLOC (w, xsize+1);
+      cy = mpn_mul_1 (wp, PTR(x), xsize, y);
+      wp[xsize] = cy;
+      xsize += (cy != 0);
+      SIZ (w) = (sub >= 0 ? xsize : -xsize);
+      return;
+    }
+
+  sub ^= wsize_signed;
+  wsize = ABS (wsize_signed);
+
+  new_wsize = MAX (wsize, xsize);
+  wp = MPZ_REALLOC (w, new_wsize+1);
+  xp = PTR (x);
+  min_size = MIN (wsize, xsize);
+
+  if (sub >= 0)
+    {
+      /* addmul of absolute values */
+
+      cy = mpn_addmul_1 (wp, xp, min_size, y);
+      wp += min_size;
+      xp += min_size;
+
+      dsize = xsize - wsize;
+#if HAVE_NATIVE_mpn_mul_1c
+      if (dsize > 0)
+	cy = mpn_mul_1c (wp, xp, dsize, y, cy);
+      else if (dsize < 0)
+	{
+	  dsize = -dsize;
+	  cy = mpn_add_1 (wp, wp, dsize, cy);
+	}
+#else
+      if (dsize != 0)
+	{
+	  mp_limb_t  cy2;
+	  if (dsize > 0)
+	    cy2 = mpn_mul_1 (wp, xp, dsize, y);
+	  else
+	    {
+	      dsize = -dsize;
+	      cy2 = 0;
+	    }
+	  cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
+	}
+#endif
+
+      wp[dsize] = cy;
+      new_wsize += (cy != 0);
+    }
+  else
+    {
+      /* submul of absolute values */
+
+      cy = mpn_submul_1 (wp, xp, min_size, y);
+      if (wsize >= xsize)
+	{
+	  /* if w bigger than x, then propagate borrow through it */
+	  if (wsize != xsize)
+	    cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
+
+	  if (cy != 0)
+	    {
+	      /* Borrow out of w, take twos complement negative to get
+		 absolute value, flip sign of w.  */
+	      cy -= mpn_neg (wp, wp, new_wsize);
+	      wp[new_wsize] = cy;
+	      new_wsize += (cy != 0);
+	      wsize_signed = -wsize_signed;
+	    }
+	}
+      else /* wsize < xsize */
+	{
+	  /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
+	     take twos complement and use an mpn_mul_1 for the rest.  */
+
+	  mp_limb_t  cy2;
+
+	  /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
+	  cy -= mpn_neg (wp, wp, wsize);
+
+	  /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
+	     returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
+	  cy2 = (cy == MP_LIMB_T_MAX);
+	  cy += cy2;
+	  MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
+	  wp[new_wsize] = cy;
+	  new_wsize += (cy != 0);
+
+	  /* Apply any -1 from above.  The value at wp+wsize is non-zero
+	     because y!=0 and the high limb of x will be non-zero.  */
+	  if (cy2)
+	    MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
+
+	  wsize_signed = -wsize_signed;
+	}
+
+      /* submul can produce high zero limbs due to cancellation, both when w
+	 has more limbs or x has more  */
+      MPN_NORMALIZE (wp, new_wsize);
+    }
+
+  SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
+
+  ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
+}
+
+
+void
+mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (UNLIKELY (y > GMP_NUMB_MAX))
+    {
+      mpz_t t;
+      mp_ptr tp;
+      mp_size_t xn;
+      TMP_DECL;
+      TMP_MARK;
+      xn = SIZ (x);
+      if (xn == 0) return;
+      MPZ_TMP_INIT (t, ABS (xn) + 1);
+      tp = PTR (t);
+      tp[0] = 0;
+      MPN_COPY (tp + 1, PTR(x), ABS (xn));
+      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
+      PTR(t) = tp + 1;
+      SIZ(t) = xn;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
+      TMP_FREE;
+      return;
+    }
+#endif
+  mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
+}
+
+void
+mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (y > GMP_NUMB_MAX)
+    {
+      mpz_t t;
+      mp_ptr tp;
+      mp_size_t xn;
+      TMP_DECL;
+      TMP_MARK;
+      xn = SIZ (x);
+      if (xn == 0) return;
+      MPZ_TMP_INIT (t, ABS (xn) + 1);
+      tp = PTR (t);
+      tp[0] = 0;
+      MPN_COPY (tp + 1, PTR(x), ABS (xn));
+      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
+      PTR(t) = tp + 1;
+      SIZ(t) = xn;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
+      TMP_FREE;
+      return;
+    }
+#endif
+  mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
+}

diff --git a/mpz/array_init.c b/mpz/array_init.c
new file mode 100644
index 0000000..df97f34
--- /dev/null
+++ b/mpz/array_init.c

@@ -0,0 +1,49 @@
+/* mpz_array_init (array, array_size, size_per_elem) --
+
+Copyright 1991, 1993-1995, 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
+{
+  mp_ptr p;
+  mp_size_t i;
+  mp_size_t nlimbs;
+
+  nlimbs = nbits / GMP_NUMB_BITS + 1;
+  p = __GMP_ALLOCATE_FUNC_LIMBS (arr_size * nlimbs);
+
+  for (i = 0; i < arr_size; i++)
+    {
+      ALLOC (&arr[i]) = nlimbs + 1; /* Yes, lie a little... */
+      SIZ (&arr[i]) = 0;
+      PTR (&arr[i]) = p + i * nlimbs;
+    }
+}

diff --git a/mpz/bin_ui.c b/mpz/bin_ui.c
new file mode 100644
index 0000000..04cd340
--- /dev/null
+++ b/mpz/bin_ui.c

@@ -0,0 +1,459 @@
+/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
+
+Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* How many special cases? Minimum is 2: 0 and 1;
+ * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
+ */
+#define APARTAJ_KALKULOJ 2
+
+/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
+ * the operands fit.
+ */
+#define UZU_BIN_UIUI 0
+
+/* Whether to use a shortcut to precompute the product of four
+ * elements (1), or precompute only the product of a couple (0).
+ *
+ * In both cases the precomputed product is then updated with some
+ * linear operations to obtain the product of the next four (1)
+ * [or two (0)] operands.
+ */
+#define KVAROPE 1
+
+static void
+posmpz_init (mpz_ptr r)
+{
+  mp_ptr rp;
+  ASSERT (SIZ (r) > 0);
+  rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
+  *rp = 0;
+  *++rp = 0;
+}
+
+/* Equivalent to mpz_add_ui (r, r, in), but faster when
+   0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
+static void
+posmpz_inc_ui (mpz_ptr r, unsigned long in)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  mpz_add_ui (r, r, in);
+#else
+  ASSERT (SIZ (r) > 0);
+  MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
+  SIZ (r) += PTR (r)[SIZ (r)];
+#endif
+}
+
+/* Equivalent to mpz_sub_ui (r, r, in), but faster when
+   0 < SIZ (r) and we know in advance that the result is positive. */
+static void
+posmpz_dec_ui (mpz_ptr r, unsigned long in)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  mpz_sub_ui (r, r, in);
+#else
+  ASSERT (mpz_cmp_ui (r, in) >= 0);
+  MPN_DECR_U (PTR (r), SIZ (r), in);
+  SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
+#endif
+}
+
+/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
+   0 < SIZ (r) and we know in advance that the result is positive. */
+static void
+posmpz_rsh1 (mpz_ptr r)
+{
+  mp_ptr rp;
+  mp_size_t rn;
+
+  rn = SIZ (r);
+  rp = PTR (r);
+  ASSERT (rn > 0);
+  mpn_rshift (rp, rp, rn, 1);
+  SIZ (r) -= rp[rn - 1] == 0;
+}
+
+/* Computes r = n(n+(2*k-1))/2
+   It uses a sqare instead of a product, computing
+   r = ((n+k-1)^2 + n - (k-1)^2)/2
+   As a side effect, sets t = n+k-1
+ */
+static void
+mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
+{
+  ASSERT (k > 0 && SIZ(n) > 0);
+  --k;
+  mpz_add_ui (t, n, k);
+  mpz_mul (r, t, t);
+  mpz_add (r, r, n);
+  posmpz_rsh1 (r);
+  if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
+    posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
+  else
+    {
+      mpz_t tmp;
+      mpz_init_set_ui (tmp, (k + (k & 1)));
+      mpz_mul_ui (tmp, tmp, k >> 1);
+      mpz_sub (r, r, tmp);
+      mpz_clear (tmp);
+    }
+}
+
+#if KVAROPE
+static void
+rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
+{
+  if (k - lk < 5)
+    {
+      do {
+	posmpz_inc_ui (p, 4*k+2);
+	mpz_addmul_ui (P, p, 4*k);
+	posmpz_dec_ui (P, k);
+	mpz_mul (r, r, P);
+      } while (--k > lk);
+    }
+  else
+    {
+      mpz_t lt;
+      unsigned long int m;
+
+      ALLOC (lt) = 0;
+      SIZ (lt) = 0;
+      if (t == NULL)
+	t = lt;
+      m = ((k + lk) >> 1) + 1;
+      rek_raising_fac4 (r, p, P, k, m, t);
+
+      posmpz_inc_ui (p, 4*m+2);
+      mpz_addmul_ui (P, p, 4*m);
+      posmpz_dec_ui (P, m);
+      mpz_set (t, P);
+      rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
+
+      mpz_mul (r, r, t);
+      mpz_clear (lt);
+    }
+}
+
+/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
+   rek_raising_fac4, and exploiting an idea inspired by a piece of
+   code that Fredrik Johansson wrote and by a comment by Niels Möller.
+
+   Assume k = 4i then compute:
+     p  = (n+1)(n+4i)/2 - i
+	  (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
+	  (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
+     P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
+     n' = n + 2
+     i' = i - 1
+	  (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
+	  (n'-1)(n'+4i'+2)/2 - i' - 1 = p
+	  (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
+	  (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
+	  (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
+     p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
+	  p' - 4i' - 2 = p
+	  (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
+	  (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
+	  (p' - 3i' - 1)*(p' - i')/2 = P
+	  (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
+	  (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
+	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
+	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
+     P' = P + 4i'p' - i'
+
+   And compute the product P * P' * P" ...
+ */
+
+static void
+mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
+{
+  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
+  posmpz_init (n);
+  posmpz_inc_ui (n, 1);
+  SIZ (r) = 0;
+  if (k & 1)
+    {
+      mpz_set (r, n);
+      posmpz_inc_ui (n, 1);
+    }
+  k >>= 1;
+  if (APARTAJ_KALKULOJ < 2 && k == 0)
+    return;
+
+  mpz_hmul_nbnpk (p, n, k, t);
+  posmpz_init (p);
+
+  if (k & 1)
+    {
+      if (SIZ (r))
+	mpz_mul (r, r, p);
+      else
+	mpz_set (r, p);
+      posmpz_inc_ui (p, k - 1);
+    }
+  k >>= 1;
+  if (APARTAJ_KALKULOJ < 4 && k == 0)
+    return;
+
+  mpz_hmul_nbnpk (t, p, k, n);
+  if (SIZ (r))
+    mpz_mul (r, r, t);
+  else
+    mpz_set (r, t);
+
+  if (APARTAJ_KALKULOJ > 8 || k > 1)
+    {
+      posmpz_dec_ui (p, k);
+      rek_raising_fac4 (r, p, t, k - 1, 0, n);
+    }
+}
+
+#else /* KVAROPE */
+
+static void
+rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
+{
+  /* Should the threshold depend on SIZ (n) ? */
+  if (k - lk < 10)
+    {
+      do {
+	posmpz_inc_ui (n, k);
+	mpz_mul (r, r, n);
+	--k;
+      } while (k > lk);
+    }
+  else
+    {
+      mpz_t t3;
+      unsigned long int m;
+
+      m = ((k + lk) >> 1) + 1;
+      rek_raising_fac (r, n, k, m, t1, t2);
+
+      posmpz_inc_ui (n, m);
+      if (t1 == NULL)
+	{
+	  mpz_init_set (t3, n);
+	  t1 = t3;
+	}
+      else
+	{
+	  ALLOC (t3) = 0;
+	  mpz_set (t1, n);
+	}
+      rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
+
+      mpz_mul (r, r, t1);
+      mpz_clear (t3);
+    }
+}
+
+/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
+   rek_raising_fac, and exploiting an idea inspired by a piece of
+   code that Fredrik Johansson wrote.
+
+   Force an even k = 2i then compute:
+     p  = (n+1)(n+2i)/2
+     i' = i - 1
+     p == (n+1)(n+2i'+2)/2
+     p' = p + i' == (n+2)(n+2i'+1)/2
+     n' = n + 1
+     p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
+
+   And compute the product p * p' * p" ...
+*/
+
+static void
+mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
+{
+  unsigned long int hk;
+  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
+  mpz_add_ui (n, n, 1);
+  hk = k >> 1;
+  mpz_hmul_nbnpk (p, n, hk, t);
+
+  if ((k & 1) != 0)
+    {
+      mpz_add_ui (t, t, hk + 1);
+      mpz_mul (r, t, p);
+    }
+  else
+    {
+      mpz_set (r, p);
+    }
+
+  if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
+    {
+      posmpz_init (p);
+      rek_raising_fac (r, p, hk - 1, 0, t, n);
+    }
+}
+#endif /* KVAROPE */
+
+/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
+   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
+   the code here only for big n.
+
+   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
+   1 section 1.2.6 part G. */
+
+void
+mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
+{
+  mpz_t      ni;
+  mp_size_t  negate;
+
+  if (SIZ (n) < 0)
+    {
+      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
+      mpz_init (ni);
+      mpz_add_ui (ni, n, 1L);
+      mpz_neg (ni, ni);
+      negate = (k & 1);   /* (-1)^k */
+    }
+  else
+    {
+      /* bin(n,k) == 0 if k>n
+	 (no test for this under the n<0 case, since -n+k-1 >= k there) */
+      if (mpz_cmp_ui (n, k) < 0)
+	{
+	  SIZ (r) = 0;
+	  return;
+	}
+
+      /* set ni = n-k */
+      mpz_init (ni);
+      mpz_sub_ui (ni, n, k);
+      negate = 0;
+    }
+
+  /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
+     for positive, 1 for negative). */
+
+  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
+     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
+     = ni, and new ni of ni+k-ni = k.  */
+  if (mpz_cmp_ui (ni, k) < 0)
+    {
+      unsigned long  tmp;
+      tmp = k;
+      k = mpz_get_ui (ni);
+      mpz_set_ui (ni, tmp);
+    }
+
+  if (k < APARTAJ_KALKULOJ)
+    {
+      if (k == 0)
+	{
+	  SIZ (r) = 1;
+	  MPZ_NEWALLOC (r, 1)[0] = 1;
+	}
+#if APARTAJ_KALKULOJ > 2
+      else if (k > 1)
+	{
+	  mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2));
+	  mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */
+	  if (APARTAJ_KALKULOJ == 2 || k == 2)
+	    {
+	      mpz_add (r, r, ni); /* n^2+n= n(n+1) */
+	      posmpz_rsh1 (r);
+	    }
+#if APARTAJ_KALKULOJ > 3
+#if APARTAJ_KALKULOJ != 5
+#error Not implemented! 3 < APARTAJ_KALKULOJ != 5
+#endif
+	  else /* k > 2 */
+	    { /* k = 3, 4 */
+	      mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
+	      if (k == 3)
+		{
+		  mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
+		  /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
+		}
+	      else /* k = 4 */
+		{
+		  mpz_add (ni, ni, r); /* (n+1)^2+n */
+		  mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
+		  /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */
+		  /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */
+		  /* because the next division will shift away this bit anyway.  */
+		  /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
+		}
+	      mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1));
+	      SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0;
+	    }
+#endif
+	}
+#endif
+      else
+	{ /* k = 1 */
+	  mpz_add_ui (r, ni, 1);
+	}
+    }
+#if UZU_BIN_UIUI
+  else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
+    {
+      mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
+    }
+#endif
+  else
+    {
+      mp_limb_t count;
+      mpz_t num, den;
+
+      mpz_init (num);
+      mpz_init (den);
+
+#if KVAROPE
+      mpz_raising_fac4 (num, ni, k, den, r);
+      popc_limb (count, k);
+      ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
+      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
+#else
+      mpz_raising_fac (num, ni, k, den, r);
+      popc_limb (count, k);
+      ASSERT (k - (k >> 1) - count >= 0);
+      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
+#endif
+
+      mpz_oddfac_1(den, k, 0);
+
+      mpz_divexact(r, num, den);
+      mpz_clear (num);
+      mpz_clear (den);
+    }
+  mpz_clear (ni);
+
+  SIZ(r) = (SIZ(r) ^ -negate) + negate;
+}

diff --git a/mpz/bin_uiui.c b/mpz/bin_uiui.c
new file mode 100644
index 0000000..542d485
--- /dev/null
+++ b/mpz/bin_uiui.c

@@ -0,0 +1,707 @@
+/* mpz_bin_uiui - compute n over k.
+
+Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+Copyright 2010-2012, 2015-2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BIN_GOETGHELUCK_THRESHOLD
+#define BIN_GOETGHELUCK_THRESHOLD  512
+#endif
+#ifndef BIN_UIUI_ENABLE_SMALLDC
+#define BIN_UIUI_ENABLE_SMALLDC    1
+#endif
+#ifndef BIN_UIUI_RECURSIVE_SMALLDC
+#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
+#endif
+
+/* Algorithm:
+
+   Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
+   which are then accumulated into mpn numbers.  The first inner loop
+   accumulates divisor factors, the 2nd inner loop accumulates exactly the same
+   number of dividend factors.  We avoid accumulating more for the divisor,
+   even with its smaller factors, since we else cannot guarantee divisibility.
+
+   Since we know each division will yield an integer, we compute the quotient
+   using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
+   2^t.
+
+   Improvements:
+
+   (1) An obvious improvement to this code would be to compute mod 2^t
+   everywhere.  Unfortunately, we cannot determine t beforehand, unless we
+   invoke some approximation, such as Stirling's formula.  Of course, we don't
+   need t to be tight.  However, it is not clear that this would help much,
+   our numbers are kept reasonably small already.
+
+   (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
+   Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
+   would make it both reasonably accurate and fast.  (We could use a table
+   stored into a limb, perhaps.)  The table should take the removed factors of
+   2 into account (those done on-the-fly in mulN).
+
+   (3) The first time in the loop we compute the odd part of a
+   factorial in kp, we might use oddfac_1 for this task.
+ */
+
+/* This threshold determines how large divisor to accumulate before we call
+   bdiv.  Perhaps we should never call bdiv, and accumulate all we are told,
+   since we are just basecase code anyway?  Presumably, this depends on the
+   relative speed of the asymptotically fast code and this code.  */
+#define SOME_THRESHOLD 20
+
+/* Multiply-into-limb functions.  These remove factors of 2 on-the-fly.  FIXME:
+   All versions of MAXFACS don't take this 2 removal into account now, meaning
+   that then, shifting just adds some overhead.  (We remove factors from the
+   completed limb anyway.)  */
+
+static mp_limb_t
+mul1 (mp_limb_t m)
+{
+  return m;
+}
+
+static mp_limb_t
+mul2 (mp_limb_t m)
+{
+  /* We need to shift before multiplying, to avoid an overflow. */
+  mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
+  return m01;
+}
+
+static mp_limb_t
+mul3 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+  mp_limb_t m2 = (m + 2);
+  return m01 * m2;
+}
+
+static mp_limb_t
+mul4 (mp_limb_t m)
+{
+  mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
+  return m03 * (m03 + 1); /* mul2 (m03) ? */
+}
+
+static mp_limb_t
+mul5 (mp_limb_t m)
+{
+  mp_limb_t m03 = (m + 0) * (m + 3) >> 1;
+  mp_limb_t m034 = m03 * (m + 4);
+  return (m03 + 1) * m034;
+}
+
+static mp_limb_t
+mul6 (mp_limb_t m)
+{
+  mp_limb_t m05 = (m + 0) * (m + 5);
+  mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
+  return m1234 * (m05 >> 1);
+}
+
+static mp_limb_t
+mul7 (mp_limb_t m)
+{
+  mp_limb_t m05 = (m + 0) * (m + 5);
+  mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3;
+  mp_limb_t m056 = m05 * (m + 6) >> 1;
+  return m1234 * m056;
+}
+
+static mp_limb_t
+mul8 (mp_limb_t m)
+{
+  mp_limb_t m07 = (m + 0) * (m + 7);
+  mp_limb_t m0257 = m07 * (m07 + 10) >> 3;
+  mp_limb_t m1346 = m07 + 9 + m0257;
+  return m0257 * m1346;
+}
+
+/*
+static mp_limb_t
+mul9 (mp_limb_t m)
+{
+  return (m + 8) * mul8 (m) ;
+}
+
+static mp_limb_t
+mul10 (mp_limb_t m)
+{
+  mp_limb_t m09 = (m + 0) * (m + 9);
+  mp_limb_t m18 = (m09 >> 1) + 4;
+  mp_limb_t m0369 = m09 * (m09 + 18) >> 3;
+  mp_limb_t m2457 = m09 * 2 + 35 + m0369;
+  return ((m0369 * m2457) >> 1) * m18;
+}
+*/
+
+typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
+
+static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */};
+#define M (numberof(mulfunc))
+
+/* Number of factors-of-2 removed by the corresponding mulN function.  */
+static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/};
+
+#if 1
+/* This variant is inaccurate but share the code with other functions.  */
+#define MAXFACS(max,l)							\
+  do {									\
+    (max) = log_n_max (l);						\
+  } while (0)
+#else
+
+/* This variant is exact(?) but uses a loop.  It takes the 2 removal
+ of mulN into account.  */
+static const unsigned long ftab[] =
+#if GMP_NUMB_BITS == 64
+  /* 1 to 8 factors per iteration */
+  {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */};
+#endif
+#if GMP_NUMB_BITS == 32
+  /* 1 to 7 factors per iteration */
+  {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */};
+#endif
+
+#define MAXFACS(max,l)							\
+  do {									\
+    int __i;								\
+    for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--)		\
+      ;									\
+    (max) = __i + 1;							\
+  } while (0)
+#endif
+
+/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
+   is an odd integer. */
+static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
+
+static void
+mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  unsigned nmax, kmax, nmaxnow, numfac;
+  mp_ptr np, kp;
+  mp_size_t nn, kn, alloc;
+  mp_limb_t i, j, t, iii, jjj, cy, dinv;
+  int cnt;
+  mp_size_t maxn;
+  TMP_DECL;
+
+  ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
+  TMP_MARK;
+
+  maxn = 1 + n / GMP_NUMB_BITS;    /* absolutely largest result size (limbs) */
+
+  /* FIXME: This allocation might be insufficient, but is usually way too
+     large.  */
+  alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
+  alloc = MIN (alloc, (mp_size_t) k) + 1;
+  TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1);
+
+  MAXFACS (nmax, n);
+  ASSERT (nmax <= M);
+  MAXFACS (kmax, k);
+  ASSERT (kmax <= M);
+  ASSERT (k >= M);
+
+  i = n - k + 1;
+
+  np[0] = 1; nn = 1;
+
+  numfac = 1;
+  j = ODD_FACTORIAL_TABLE_LIMIT + 1;
+  jjj = ODD_FACTORIAL_TABLE_MAX;
+  ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
+
+  while (1)
+    {
+      kp[0] = jjj;				/* store new factors */
+      kn = 1;
+      t = k - j + 1;
+      kmax = MIN (kmax, t);
+
+      while (kmax != 0 && kn < SOME_THRESHOLD)
+	{
+	  jjj = mulfunc[kmax - 1] (j);
+	  j += kmax;				/* number of factors used */
+	  count_trailing_zeros (cnt, jjj);	/* count low zeros */
+	  jjj >>= cnt;				/* remove remaining low zeros */
+	  cy = mpn_mul_1 (kp, kp, kn, jjj);	/* accumulate new factors */
+	  kp[kn] = cy;
+	  kn += cy != 0;
+	  t = k - j + 1;
+	  kmax = MIN (kmax, t);
+	}
+      numfac = j - numfac;
+
+      while (numfac != 0)
+	{
+	  nmaxnow = MIN (nmax, numfac);
+	  iii = mulfunc[nmaxnow - 1] (i);
+	  i += nmaxnow;				/* number of factors used */
+	  count_trailing_zeros (cnt, iii);	/* count low zeros */
+	  iii >>= cnt;				/* remove remaining low zeros */
+	  cy = mpn_mul_1 (np, np, nn, iii);	/* accumulate new factors */
+	  np[nn] = cy;
+	  nn += cy != 0;
+	  numfac -= nmaxnow;
+	}
+
+      ASSERT (nn < alloc);
+
+      binvert_limb (dinv, kp[0]);
+      nn += (np[nn - 1] >= kp[kn - 1]);
+      nn -= kn;
+      mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+      mpn_neg (np, np, nn);
+
+      if (kmax == 0)
+	break;
+      numfac = j;
+
+      jjj = mulfunc[kmax - 1] (j);
+      j += kmax;				/* number of factors used */
+      count_trailing_zeros (cnt, jjj);		/* count low zeros */
+      jjj >>= cnt;				/* remove remaining low zeros */
+    }
+
+  /* Put back the right number of factors of 2.  */
+  popc_limb (cnt, n - k);
+  popc_limb (j, k);
+  cnt += j;
+  popc_limb (j, n);
+  cnt -= j;
+  if (cnt != 0)
+    {
+      ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
+      cy = mpn_lshift (np, np, nn, cnt);
+      np[nn] = cy;
+      nn += cy != 0;
+    }
+
+  nn -= np[nn - 1] == 0;	/* normalisation */
+
+  kp = MPZ_NEWALLOC (r, nn);
+  SIZ(r) = nn;
+  MPN_COPY (kp, np, nn);
+  TMP_FREE;
+}
+
+static void
+mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  unsigned nmax, numfac;
+  mp_ptr rp;
+  mp_size_t rn, alloc;
+  mp_limb_t i, iii, cy;
+  unsigned i2cnt, cnt;
+
+  MAXFACS (nmax, n);
+  nmax = MIN (nmax, M);
+
+  i = n - k + 1;
+
+  i2cnt = __gmp_fac2cnt_table[k / 2 - 1];		/* low zeros count */
+  if (nmax >= k)
+    {
+      MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >>
+	(i2cnt - tcnttab[k - 1]);
+      SIZ(r) = 1;
+      return;
+    }
+
+  count_leading_zeros (cnt, (mp_limb_t) n);
+  cnt = GMP_LIMB_BITS - cnt;
+  alloc = cnt * k / GMP_NUMB_BITS + 3;	/* FIXME: ensure rounding is enough. */
+  rp = MPZ_NEWALLOC (r, alloc);
+
+  rp[0] = mulfunc[nmax - 1] (i);
+  rn = 1;
+  i += nmax;				/* number of factors used */
+  i2cnt -= tcnttab[nmax - 1];		/* low zeros count */
+  numfac = k - nmax;
+  do
+    {
+      nmax = MIN (nmax, numfac);
+      iii = mulfunc[nmax - 1] (i);
+      i += nmax;			/* number of factors used */
+      i2cnt -= tcnttab[nmax - 1];	/* update low zeros count */
+      cy = mpn_mul_1 (rp, rp, rn, iii);	/* accumulate new factors */
+      rp[rn] = cy;
+      rn += cy != 0;
+      numfac -= nmax;
+    } while (numfac != 0);
+
+  ASSERT (rn < alloc);
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt);
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* Algorithm:
+
+   Plain and simply multiply things together.
+
+   We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
+   that k!/2^t is odd).
+
+*/
+
+static mp_limb_t
+bc_bin_uiui (unsigned int n, unsigned int k)
+{
+  return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
+    << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
+    & GMP_NUMB_MASK;
+}
+
+/* Algorithm:
+
+   Recursively exploit the relation
+   bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
+
+   Values for binomial(k,k>>1) that fit in a limb are precomputed
+   (with inverses).
+*/
+
+/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
+   binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
+static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
+
+/* bin2kkinv[i] = bin2kk[i]^-1 mod B */
+static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
+
+/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
+static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
+
+static void
+mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_ptr rp;
+  mp_size_t rn;
+  unsigned long int hk;
+
+  hk = k >> 1;
+
+  if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
+    mpz_smallk_bin_uiui (r, n, hk);
+  else
+    mpz_smallkdc_bin_uiui (r, n, hk);
+  k -= hk;
+  n -= hk;
+  if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
+    mp_limb_t cy;
+    rn = SIZ (r);
+    rp = MPZ_REALLOC (r, rn + 1);
+    cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
+    rp [rn] = cy;
+    rn += cy != 0;
+  } else {
+    mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
+    mpz_t t;
+
+    ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
+    PTR (t) = buffer;
+    if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (t, n, k);
+    else
+      mpz_smallkdc_bin_uiui (t, n, k);
+    mpz_mul (r, r, t);
+    rp = PTR (r);
+    rn = SIZ (r);
+  }
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+		    bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+		    fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
+ *
+ * Contributed to the GNU project by Marco Bodrato.
+ *
+ * Implementation of the algorithm by P. Goetgheluck, "Computing
+ * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
+ * No. 4 (April 1987), pp. 360-365.
+ *
+ * Acknowledgment: Peter Luschny did spot the slowness of the previous
+ * code and suggested the reference.
+ */
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
+  if ((PR) > (MAX_PR)) {					\
+    (VEC)[(I)++] = (PR);					\
+    (PR) = 1;							\
+  }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end)			\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if ((*__sieve & __mask) == 0)				\
+	{							\
+	  mp_limb_t prime;					\
+	  prime = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
+  do {								\
+    mp_limb_t __mask, *__sieve, __max_i, __i;			\
+								\
+    __i = (start)-(off);					\
+    __sieve = (sieve) + __i / GMP_LIMB_BITS;			\
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
+    __i += (off);						\
+								\
+    LOOP_ON_SIEVE_CONTINUE(prime,end)
+
+#define LOOP_ON_SIEVE_STOP					\
+	}							\
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
+      __sieve += __mask & 1;					\
+    }  while (__i <= __max_i)
+
+#define LOOP_ON_SIEVE_END					\
+    LOOP_ON_SIEVE_STOP;						\
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+/*********************************************************/
+/* Section binomial: fast binomial implementation        */
+/*********************************************************/
+
+#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __a, __b, __prime, __ma,__mb;		\
+    __prime = (P);					\
+    __a = (N); __b = (K); __mb = 0;			\
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);		\
+    do {						\
+      __mb += __b % __prime; __b /= __prime;		\
+      __ma = __a % __prime; __a /= __prime;		\
+      if (__ma < __mb) {				\
+        __mb = 1; (PR) *= __prime;			\
+      } else  __mb = 0;					\
+    } while (__a >= __prime);				\
+  } while (0)
+
+#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __prime;					\
+    __prime = (P);					\
+    if (((N) % __prime) < ((K) % __prime)) {		\
+      FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I);	\
+    }							\
+  } while (0)
+
+/* Returns an approximation of the sqare root of x.
+ * It gives:
+ *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
+ * or
+ *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
+ */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x);
+  s = (GMP_LIMB_BITS - s) >> 1;
+  return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
+}
+
+static void
+mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_limb_t *sieve, *factors, count;
+  mp_limb_t prod, max_prod;
+  mp_size_t j;
+  TMP_DECL;
+
+  ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
+  ASSERT (n >= 25);
+
+  TMP_MARK;
+  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+
+  count = gmp_primesieve (sieve, n) + 1;
+  factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
+
+  max_prod = GMP_NUMB_MAX / n;
+
+  /* Handle primes = 2, 3 separately. */
+  popc_limb (count, n - k);
+  popc_limb (j, k);
+  count += j;
+  popc_limb (j, n);
+  count -= j;
+  prod = CNST_LIMB(1) << count;
+
+  j = 0;
+  COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
+
+  /* Accumulate prime factors from 5 to n/2 */
+    {
+      mp_limb_t s;
+
+      s = limb_apprsqrt(n);
+      s = n_to_bit (s);
+      ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
+      ASSERT (s <= n_to_bit (n >> 1));
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+      COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_STOP;
+
+      ASSERT (max_prod <= GMP_NUMB_MAX / 2);
+      max_prod <<= 1;
+
+      LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1));
+      SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
+
+      max_prod >>= 1;
+    }
+
+  /* Store primes from (n-k)+1 to n */
+  ASSERT (n_to_bit (n - k) < n_to_bit (n));
+
+  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
+  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+  LOOP_ON_SIEVE_END;
+
+  if (LIKELY (j != 0))
+    {
+      factors[j++] = prod;
+      mpz_prodlimbs (r, factors, j);
+    }
+  else
+    {
+      MPZ_NEWALLOC (r, 1)[0] = prod;
+      SIZ (r) = 1;
+    }
+  TMP_FREE;
+}
+
+#undef COUNT_A_PRIME
+#undef SH_COUNT_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+
+/*********************************************************/
+/* End of implementation of Goetgheluck's algorithm      */
+/*********************************************************/
+
+void
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  if (UNLIKELY (n < k)) {
+    SIZ (r) = 0;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
+    mpz_t tmp;
+
+    mpz_init_set_ui (tmp, n);
+    mpz_bin_ui (r, tmp, k);
+    mpz_clear (tmp);
+#endif
+  } else {
+    ASSERT (n <= GMP_NUMB_MAX);
+    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+    k = MIN (k, n - k);
+    if (k < 2) {
+      MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
+      SIZ(r) = 1;
+    } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
+      MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k);
+      SIZ(r) = 1;
+    } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (r, n, k);
+    else if (BIN_UIUI_ENABLE_SMALLDC &&
+	     k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
+      mpz_smallkdc_bin_uiui (r, n, k);
+    else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
+	     k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
+      mpz_goetgheluck_bin_uiui (r, n, k);
+    else
+      mpz_bdiv_bin_uiui (r, n, k);
+  }
+}

diff --git a/mpz/cdiv_q.c b/mpz/cdiv_q.c
new file mode 100644
index 0000000..f19eb74
--- /dev/null
+++ b/mpz/cdiv_q.c

@@ -0,0 +1,52 @@
+/* mpz_cdiv_q -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994-1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t dividend_size = SIZ (dividend);
+  mp_size_t divisor_size = SIZ (divisor);
+  mpz_t rem;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend_size) >= 0 && SIZ (rem) != 0)
+    mpz_add_ui (quot, quot, 1L);
+
+  TMP_FREE;
+}

diff --git a/mpz/cdiv_q_ui.c b/mpz/cdiv_q_ui.c
new file mode 100644
index 0000000..269d9a3
--- /dev/null
+++ b/mpz/cdiv_q_ui.c

@@ -0,0 +1,102 @@
+/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994, 1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  qp[0] = 0;
+	  rl = np[0];
+	  qn = 1;		/* a white lie, fixed below */
+	}
+      else
+	{
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	  qn = nn - 2 + 1;
+	}
+
+      if (rl != 0 && ns >= 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	}
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+
+      if (rl != 0 && ns >= 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	}
+
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/cdiv_qr.c b/mpz/cdiv_qr.c
new file mode 100644
index 0000000..bc9b892
--- /dev/null
+++ b/mpz/cdiv_qr.c

@@ -0,0 +1,64 @@
+/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994-1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = SIZ (divisor);
+  mp_size_t xsize;
+  mpz_t temp_divisor;		/* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the quotient and
+     remainder have been preliminary calculated.  We have to copy it to
+     temporary space if it's the same variable as either QUOT or REM.  */
+  if (quot == divisor || rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  xsize = SIZ (dividend) ^ divisor_size;;
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if (xsize >= 0 && SIZ (rem) != 0)
+    {
+      mpz_add_ui (quot, quot, 1L);
+      mpz_sub (rem, rem, divisor);
+    }
+
+  TMP_FREE;
+}

diff --git a/mpz/cdiv_qr_ui.c b/mpz/cdiv_qr_ui.c
new file mode 100644
index 0000000..0c11fb6
--- /dev/null
+++ b/mpz/cdiv_qr_ui.c

@@ -0,0 +1,118 @@
+/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994-1996, 1999, 2001, 2002, 2004, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      rp = MPZ_REALLOC (rem, 2);
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  qp[0] = 0;
+	  qn = 1;		/* a white lie, fixed below */
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	  qn = nn - 2 + 1;
+	}
+
+      if (rl != 0 && ns >= 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  if (ns >= 0)
+	    {
+	      mpn_incr_u (qp, (mp_limb_t) 1);
+	      rl = divisor - rl;
+	    }
+
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  SIZ(rem) = -(rl != 0);
+	}
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/cdiv_r.c b/mpz/cdiv_r.c
new file mode 100644
index 0000000..83c624a
--- /dev/null
+++ b/mpz/cdiv_r.c

@@ -0,0 +1,60 @@
+/* mpz_cdiv_r -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994-1996, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = SIZ (divisor);
+  mpz_t temp_divisor;		/* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  if ((divisor_size ^ SIZ (dividend)) >= 0 && SIZ (rem) != 0)
+    mpz_sub (rem, rem, divisor);
+
+  TMP_FREE;
+}

diff --git a/mpz/cdiv_r_ui.c b/mpz/cdiv_r_ui.c
new file mode 100644
index 0000000..84d51db
--- /dev/null
+++ b/mpz/cdiv_r_ui.c

@@ -0,0 +1,109 @@
+/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994-1996, 2001, 2002, 2004, 2005, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      rp = MPZ_REALLOC (rem, 2);
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  TMP_MARK;
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  TMP_FREE;
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	}
+
+      if (rl != 0 && ns >= 0)
+	{
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  if (ns >= 0)
+	    rl = divisor - rl;
+
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  SIZ(rem) = -1;
+	}
+    }
+
+  return rl;
+}

diff --git a/mpz/cdiv_ui.c b/mpz/cdiv_ui.c
new file mode 100644
index 0000000..e1a1c49
--- /dev/null
+++ b/mpz/cdiv_ui.c

@@ -0,0 +1,102 @@
+/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994-1996, 2001, 2002, 2004, 2005, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  TMP_MARK;
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  TMP_FREE;
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	}
+
+      if (rl != 0 && ns >= 0)
+	{
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	;
+      else
+	{
+	  if (ns >= 0)
+	    rl = divisor - rl;
+	}
+    }
+
+  return rl;
+}

diff --git a/mpz/cfdiv_q_2exp.c b/mpz/cfdiv_q_2exp.c
new file mode 100644
index 0000000..413ea36
--- /dev/null
+++ b/mpz/cfdiv_q_2exp.c

@@ -0,0 +1,111 @@
+/* mpz_cdiv_q_2exp, mpz_fdiv_q_2exp -- quotient from mpz divided by 2^n.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004, 2012,
+2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* dir==1 for ceil, dir==-1 for floor */
+
+static void __gmpz_cfdiv_q_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
+#define cfdiv_q_2exp(w,u,cnt,dir)  __gmpz_cfdiv_q_2exp (REGPARM_3_1 (w,u,cnt,dir))
+
+REGPARM_ATTR (1) static void
+cfdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
+{
+  mp_size_t  wsize, usize, abs_usize, limb_cnt, i;
+  mp_srcptr  up;
+  mp_ptr     wp;
+  mp_limb_t  round, rmask;
+
+  usize = SIZ (u);
+  abs_usize = ABS (usize);
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  wsize = abs_usize - limb_cnt;
+  if (wsize <= 0)
+    {
+      /* u < 2**cnt, so result 1, 0 or -1 according to rounding */
+      MPZ_NEWALLOC (w, 1)[0] = 1;
+      SIZ(w) = (usize == 0 || (usize ^ dir) < 0 ? 0 : dir);
+      return;
+    }
+
+  /* +1 limb to allow for mpn_add_1 below */
+  wp = MPZ_REALLOC (w, wsize+1);
+
+  /* Check for rounding if direction matches u sign.
+     Set round if we're skipping non-zero limbs.  */
+  up = PTR(u);
+  round = 0;
+  rmask = ((usize ^ dir) >= 0 ? MP_LIMB_T_MAX : 0);
+  if (rmask != 0)
+    for (i = 0; i < limb_cnt && round == 0; i++)
+      round = up[i];
+
+  cnt %= GMP_NUMB_BITS;
+  if (cnt != 0)
+    {
+      round |= rmask & mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+      wsize -= (wp[wsize - 1] == 0);
+    }
+  else
+    MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+
+  if (round != 0)
+    {
+      if (wsize != 0)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
+	  wp[wsize] = cy;
+	  wsize += cy;
+	}
+      else
+	{
+	  /* We shifted something to zero.  */
+	  wp[0] = 1;
+	  wsize = 1;
+	}
+    }
+  SIZ(w) = (usize >= 0 ? wsize : -wsize);
+}
+
+
+void
+mpz_cdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_q_2exp (w, u, cnt, 1);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_q_2exp (w, u, cnt, -1);
+}

diff --git a/mpz/cfdiv_r_2exp.c b/mpz/cfdiv_r_2exp.c
new file mode 100644
index 0000000..fedb97d
--- /dev/null
+++ b/mpz/cfdiv_r_2exp.c

@@ -0,0 +1,159 @@
+/* mpz_cdiv_r_2exp, mpz_fdiv_r_2exp -- remainder from mpz divided by 2^n.
+
+Copyright 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Bit mask of "n" least significant bits of a limb. */
+#define LOW_MASK(n)   ((CNST_LIMB(1) << (n)) - 1)
+
+
+/* dir==1 for ceil, dir==-1 for floor */
+
+static void __gmpz_cfdiv_r_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
+#define cfdiv_r_2exp(w,u,cnt,dir)  __gmpz_cfdiv_r_2exp (REGPARM_3_1 (w, u, cnt, dir))
+
+REGPARM_ATTR (1) static void
+cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
+{
+  mp_size_t  usize, abs_usize, limb_cnt, i;
+  mp_srcptr  up;
+  mp_ptr     wp;
+  mp_limb_t  high;
+
+  usize = SIZ(u);
+  if (usize == 0)
+    {
+      SIZ(w) = 0;
+      return;
+    }
+
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  cnt %= GMP_NUMB_BITS;
+  abs_usize = ABS (usize);
+
+  /* MPZ_REALLOC(w) below is only when w!=u, so we can fetch PTR(u) here
+     nice and early */
+  up = PTR(u);
+
+  if ((usize ^ dir) < 0)
+    {
+      /* Round towards zero, means just truncate */
+
+      if (w == u)
+	{
+	  /* if already smaller than limb_cnt then do nothing */
+	  if (abs_usize <= limb_cnt)
+	    return;
+	  wp = (mp_ptr) up;
+	}
+      else
+	{
+	  i = MIN (abs_usize, limb_cnt+1);
+	  wp = MPZ_NEWALLOC (w, i);
+	  MPN_COPY (wp, up, i);
+
+	  /* if smaller than limb_cnt then only the copy is needed */
+	  if (abs_usize <= limb_cnt)
+	    {
+	      SIZ(w) = usize;
+	      return;
+	    }
+	}
+    }
+  else
+    {
+      /* Round away from zero, means twos complement if non-zero */
+
+      /* if u!=0 and smaller than divisor, then must negate */
+      if (abs_usize <= limb_cnt)
+	goto negate;
+
+      /* if non-zero low limb, then must negate */
+      for (i = 0; i < limb_cnt; i++)
+	if (up[i] != 0)
+	  goto negate;
+
+      /* if non-zero partial limb, then must negate */
+      if ((up[limb_cnt] & LOW_MASK (cnt)) != 0)
+	goto negate;
+
+      /* otherwise low bits of u are zero, so that's the result */
+      SIZ(w) = 0;
+      return;
+
+    negate:
+      /* twos complement negation to get 2**cnt-u */
+
+      wp = MPZ_REALLOC (w, limb_cnt+1);
+      up = PTR(u);
+
+      /* Ones complement */
+      i = MIN (abs_usize, limb_cnt+1);
+      ASSERT_CARRY (mpn_neg (wp, up, i));
+      for ( ; i <= limb_cnt; i++)
+	wp[i] = GMP_NUMB_MAX;
+
+      usize = -usize;
+    }
+
+  /* Mask the high limb */
+  high = wp[limb_cnt];
+  high &= LOW_MASK (cnt);
+  wp[limb_cnt] = high;
+
+  /* Strip any consequent high zeros */
+  while (high == 0)
+    {
+      limb_cnt--;
+      if (limb_cnt < 0)
+	{
+	  SIZ(w) = 0;
+	  return;
+	}
+      high = wp[limb_cnt];
+    }
+
+  limb_cnt++;
+  SIZ(w) = (usize >= 0 ? limb_cnt : -limb_cnt);
+}
+
+
+void
+mpz_cdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_r_2exp (w, u, cnt, 1);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_r_2exp (w, u, cnt, -1);
+}

diff --git a/mpz/clear.c b/mpz/clear.c
new file mode 100644
index 0000000..f8c38a2
--- /dev/null
+++ b/mpz/clear.c

@@ -0,0 +1,40 @@
+/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
+   an integer.
+
+Copyright 1991, 1993-1995, 2000, 2001, 2012, 2014, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_clear (mpz_ptr x)
+{
+  if (ALLOC (x))
+    __GMP_FREE_FUNC_LIMBS (PTR (x), ALLOC(x));
+}

diff --git a/mpz/clears.c b/mpz/clears.c
new file mode 100644
index 0000000..7ac257a
--- /dev/null
+++ b/mpz/clears.c

@@ -0,0 +1,50 @@
+/* mpz_clears() -- Clear multiple mpz_t variables.
+
+Copyright 2009, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpz_clears (mpz_ptr x, ...)
+{
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      if (ALLOC (x))
+	__GMP_FREE_FUNC_LIMBS (PTR (x), ALLOC (x));
+      x = va_arg (ap, mpz_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpz/clrbit.c b/mpz/clrbit.c
new file mode 100644
index 0000000..8b3c854
--- /dev/null
+++ b/mpz/clrbit.c

@@ -0,0 +1,115 @@
+/* mpz_clrbit -- clear a specified bit.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_idx)
+{
+  mp_size_t dsize = SIZ (d);
+  mp_ptr dp = PTR (d);
+  mp_size_t limb_idx;
+  mp_limb_t mask;
+
+  limb_idx = bit_idx / GMP_NUMB_BITS;
+  mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
+  if (dsize >= 0)
+    {
+      if (limb_idx < dsize)
+	{
+	  mp_limb_t  dlimb;
+	  dlimb = dp[limb_idx] & ~mask;
+	  dp[limb_idx] = dlimb;
+
+	  if (UNLIKELY ((dlimb == 0) + limb_idx == dsize)) /* dsize == limb_idx + 1 */
+	    {
+	      /* high limb became zero, must normalize */
+	      MPN_NORMALIZE (dp, limb_idx);
+	      SIZ (d) = limb_idx;
+	    }
+	}
+      else
+	;
+    }
+  else
+    {
+      /* Simulate two's complement arithmetic, i.e. simulate
+	 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+	 2. clear the bit.
+	 3. Set OP = ~OP + 1.  */
+
+      dsize = -dsize;
+
+      if (limb_idx < dsize)
+	{
+	  mp_size_t zero_bound;
+
+	  /* No index upper bound on this loop, we're sure there's a non-zero limb
+	     sooner or later.  */
+	  zero_bound = 0;
+	  while (dp[zero_bound] == 0)
+	    zero_bound++;
+
+	  if (limb_idx > zero_bound)
+	    {
+	      dp[limb_idx] |= mask;
+	    }
+	  else if (limb_idx == zero_bound)
+	    {
+	      mp_limb_t  dlimb;
+	      dlimb = (((dp[limb_idx] - 1) | mask) + 1) & GMP_NUMB_MASK;
+	      dp[limb_idx] = dlimb;
+
+	      if (dlimb == 0)
+		{
+		  /* Increment at limb_idx + 1.  Extend the number with a zero limb
+		     for simplicity.  */
+		  dp = MPZ_REALLOC (d, dsize + 1);
+		  dp[dsize] = 0;
+		  MPN_INCR_U (dp + limb_idx + 1, dsize - limb_idx, 1);
+		  dsize += dp[dsize];
+
+		  SIZ (d) = -dsize;
+		}
+	    }
+	  else
+	    ;
+	}
+      else
+	{
+	  /* Ugh.  The bit should be cleared outside of the end of the
+	     number.  We have to increase the size of the number.  */
+	  dp = MPZ_REALLOC (d, limb_idx + 1);
+	  SIZ (d) = -(limb_idx + 1);
+	  MPN_ZERO (dp + dsize, limb_idx - dsize);
+	  dp[limb_idx] = mask;
+	}
+    }
+}

diff --git a/mpz/cmp.c b/mpz/cmp.c
new file mode 100644
index 0000000..ba4c023
--- /dev/null
+++ b/mpz/cmp.c

@@ -0,0 +1,53 @@
+/* mpz_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2011, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_cmp (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mp_size_t  usize, vsize, asize;
+  mp_srcptr  up, vp;
+  int        cmp;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+  /* Cannot use usize - vsize, may overflow an "int" */
+  if (usize != vsize)
+    return (usize > vsize) ? 1 : -1;
+
+  asize = ABS (usize);
+  up = PTR(u);
+  vp = PTR(v);
+  MPN_CMP (cmp, up, vp, asize);
+  return (usize >= 0 ? cmp : -cmp);
+}

diff --git a/mpz/cmp_d.c b/mpz/cmp_d.c
new file mode 100644
index 0000000..b5b5e8b
--- /dev/null
+++ b/mpz/cmp_d.c

@@ -0,0 +1,144 @@
+/* mpz_cmp_d -- compare absolute values of mpz and double.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+
+
+#define RETURN_CMP(zl, dl)                      \
+  do {                                          \
+    zlimb = (zl);                               \
+    dlimb = (dl);                               \
+    if (zlimb != dlimb)                         \
+      return (zlimb >= dlimb ? ret : -ret);     \
+  } while (0)
+
+#define RETURN_NONZERO(ptr, size, val)          \
+  do {                                          \
+    mp_size_t __i;                              \
+    for (__i = (size)-1; __i >= 0; __i--)       \
+      if ((ptr)[__i] != 0)                      \
+        return val;                             \
+    return 0;                                   \
+  } while (0)
+
+
+int
+mpz_cmp_d (mpz_srcptr z, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;
+  mp_srcptr  zp;
+  mp_size_t  zsize;
+  int        dexp, ret;
+
+  /* d=NaN is an invalid operation, there's no sensible return value.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), goto z_zero);
+
+  /* 1. Either operand zero. */
+  zsize = SIZ(z);
+  if (d == 0.0)
+    return zsize;
+  if (zsize == 0)
+    {
+    z_zero:
+      return (d < 0.0 ? 1 : -1);
+    }
+
+  /* 2. Opposite signs. */
+  if (zsize >= 0)
+    {
+      if (d < 0.0)
+	return 1;    /* >=0 cmp <0 */
+      ret = 1;
+    }
+  else
+    {
+      if (d >= 0.0)
+	return -1;   /* <0 cmp >=0 */
+      ret = -1;
+      d = -d;
+      zsize = -zsize;
+    }
+
+  /* 3. Small d, knowing abs(z) >= 1. */
+  if (d < 1.0)
+    return ret;
+
+  dexp = __gmp_extract_double (darray, d);
+  ASSERT (dexp >= 1);
+
+  /* 4. Check for different high limb positions. */
+  if (zsize != dexp)
+    return (zsize >= dexp ? ret : -ret);
+
+  /* 5. Limb data. */
+  zp = PTR(z);
+
+#if LIMBS_PER_DOUBLE == 2
+  RETURN_CMP (zp[zsize-1], darray[1]);
+  if (zsize == 1)
+    return (darray[0] != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[0]);
+  RETURN_NONZERO (zp, zsize-2, ret);
+#endif
+
+#if LIMBS_PER_DOUBLE == 3
+  RETURN_CMP (zp[zsize-1], darray[2]);
+  if (zsize == 1)
+    return ((darray[0] | darray[1]) != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[1]);
+  if (zsize == 2)
+    return (darray[0] != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-3], darray[0]);
+  RETURN_NONZERO (zp, zsize-3, ret);
+#endif
+
+#if LIMBS_PER_DOUBLE >= 4
+  {
+    int i;
+    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)
+      {
+	RETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);
+	if (i >= zsize)
+	  RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -ret);
+      }
+    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, ret);
+  }
+#endif
+}

diff --git a/mpz/cmp_si.c b/mpz/cmp_si.c
new file mode 100644
index 0000000..23296c8
--- /dev/null
+++ b/mpz/cmp_si.c

@@ -0,0 +1,69 @@
+/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
+   Return positive, zero, or negative based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993-1996, 2000-2002, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+_mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
+{
+#if GMP_NAIL_BITS != 0
+  /* FIXME.  This isn't very pretty.  */
+  mpz_t tmp;
+  mp_limb_t tt[2];
+  PTR(tmp) = tt;
+  ALLOC(tmp) = 2;
+  mpz_set_si (tmp, v_digit);
+  return mpz_cmp (u, tmp);
+#else
+
+  mp_size_t vsize, usize;
+
+  usize = SIZ (u);
+  vsize = (v_digit > 0) - (v_digit < 0);
+
+  if ((usize == 0) | (usize != vsize))
+    return usize - vsize;
+  else {
+    mp_limb_t u_digit, absv_digit;
+
+    u_digit = PTR (u)[0];
+    absv_digit = ABS_CAST (unsigned long, v_digit);
+
+    if (u_digit == absv_digit)
+      return 0;
+
+    if (u_digit > absv_digit)
+      return usize;
+    else
+      return -usize;
+  }
+#endif
+}

diff --git a/mpz/cmp_ui.c b/mpz/cmp_ui.c
new file mode 100644
index 0000000..4b40ab7
--- /dev/null
+++ b/mpz/cmp_ui.c

@@ -0,0 +1,77 @@
+/* mpz_cmp_ui.c -- Compare an mpz_t a with an mp_limb_t b.  Return positive,
+  zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright 1991, 1993-1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+_mpz_cmp_ui (mpz_srcptr u, unsigned long int v_digit) __GMP_NOTHROW
+{
+  mp_ptr up;
+  mp_size_t un;
+  mp_limb_t ul;
+
+  up = PTR(u);
+  un = SIZ(u);
+
+  if (un == 0)
+    return -(v_digit != 0);
+
+  if (un == 1)
+    {
+      ul = up[0];
+      if (ul > v_digit)
+	return 1;
+      if (ul < v_digit)
+	return -1;
+      return 0;
+    }
+
+#if GMP_NAIL_BITS != 0
+  if (v_digit > GMP_NUMB_MAX)
+    {
+      if (un == 2)
+	{
+	  ul = up[0] + (up[1] << GMP_NUMB_BITS);
+
+	  if ((up[1] >> GMP_NAIL_BITS) != 0)
+	    return 1;
+
+	  if (ul > v_digit)
+	    return 1;
+	  if (ul < v_digit)
+	    return -1;
+	  return 0;
+	}
+    }
+#endif
+
+  return un > 0 ? 1 : -1;
+}

diff --git a/mpz/cmpabs.c b/mpz/cmpabs.c
new file mode 100644
index 0000000..7e9f160
--- /dev/null
+++ b/mpz/cmpabs.c

@@ -0,0 +1,53 @@
+/* mpz_cmpabs(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+int
+mpz_cmpabs (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mp_size_t  usize, vsize;
+  mp_srcptr  up, vp;
+  int        cmp;
+
+  usize = ABSIZ (u);
+  vsize = ABSIZ (v);
+  /* Cannot use usize - vsize, may overflow an "int" */
+  if (usize != vsize)
+    return (usize > vsize) ? 1 : -1;
+
+  up = PTR(u);
+  vp = PTR(v);
+  MPN_CMP (cmp, up, vp, usize);
+  return cmp;
+}

diff --git a/mpz/cmpabs_d.c b/mpz/cmpabs_d.c
new file mode 100644
index 0000000..d2431cc
--- /dev/null
+++ b/mpz/cmpabs_d.c

@@ -0,0 +1,129 @@
+/* mpz_cmpabs_d -- compare absolute values of mpz and double.
+
+Copyright 2001-2003, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+
+
+#define RETURN_CMP(zl, dl)              \
+  do {                                  \
+    zlimb = (zl);                       \
+    dlimb = (dl);                       \
+    if (zlimb != dlimb)                 \
+      return (zlimb >= dlimb ? 1 : -1); \
+  } while (0)
+
+#define RETURN_NONZERO(ptr, size, val)          \
+  do {                                          \
+    mp_size_t __i;                              \
+    for (__i = (size)-1; __i >= 0; __i--)       \
+      if ((ptr)[__i] != 0)                      \
+        return val;                             \
+    return 0;                                   \
+  } while (0)
+
+
+int
+mpz_cmpabs_d (mpz_srcptr z, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;
+  mp_srcptr  zp;
+  mp_size_t  zsize;
+  int        dexp;
+
+  /* d=NaN is an invalid operation, there's no sensible return value.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), return -1);
+
+  /* 1. Check for either operand zero. */
+  zsize = SIZ(z);
+  if (d == 0.0)
+    return (zsize != 0);
+  if (zsize == 0)
+    return -1; /* d != 0 */
+
+  /* 2. Ignore signs. */
+  zsize = ABS(zsize);
+  d = ABS(d);
+
+  /* 3. Small d, knowing abs(z) >= 1. */
+  if (d < 1.0)
+    return 1;
+
+  dexp = __gmp_extract_double (darray, d);
+  ASSERT (dexp >= 1);
+
+  /* 4. Check for different high limb positions. */
+  if (zsize != dexp)
+    return (zsize >= dexp ? 1 : -1);
+
+  /* 5. Limb data. */
+  zp = PTR(z);
+
+#if LIMBS_PER_DOUBLE == 2
+  RETURN_CMP (zp[zsize-1], darray[1]);
+  if (zsize == 1)
+    return (darray[0] != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[0]);
+  RETURN_NONZERO (zp, zsize-2, 1);
+#endif
+
+#if LIMBS_PER_DOUBLE == 3
+  RETURN_CMP (zp[zsize-1], darray[2]);
+  if (zsize == 1)
+    return ((darray[0] | darray[1]) != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[1]);
+  if (zsize == 2)
+    return (darray[0] != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-3], darray[0]);
+  RETURN_NONZERO (zp, zsize-3, 1);
+#endif
+
+#if LIMBS_PER_DOUBLE >= 4
+  {
+    int i;
+    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)
+      {
+	RETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);
+	if (i >= zsize)
+	  RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -1);
+      }
+    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, 1);
+  }
+#endif
+}

diff --git a/mpz/cmpabs_ui.c b/mpz/cmpabs_ui.c
new file mode 100644
index 0000000..6deffb3
--- /dev/null
+++ b/mpz/cmpabs_ui.c

@@ -0,0 +1,76 @@
+/* mpz_cmpabs_ui.c -- Compare an mpz_t a with an mp_limb_t b.  Return positive,
+  zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright 1991, 1993-1995, 1997, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_cmpabs_ui (mpz_srcptr u, unsigned long int v_digit) __GMP_NOTHROW
+{
+  mp_ptr up;
+  mp_size_t un;
+  mp_limb_t ul;
+
+  up = PTR(u);
+  un = SIZ(u);
+
+  if (un == 0)
+    return -(v_digit != 0);
+
+  un = ABS (un);
+
+  if (un == 1)
+    {
+      ul = up[0];
+      if (ul > v_digit)
+	return 1;
+      if (ul < v_digit)
+	return -1;
+      return 0;
+    }
+
+#if GMP_NAIL_BITS != 0
+  if (v_digit > GMP_NUMB_MAX)
+    {
+      if (un == 2)
+	{
+	  ul = up[0] + (up[1] << GMP_NUMB_BITS);
+
+	  if (ul > v_digit)
+	    return 1;
+	  if (ul < v_digit)
+	    return -1;
+	  return 0;
+	}
+    }
+#endif
+
+  return 1;
+}

diff --git a/mpz/com.c b/mpz/com.c
new file mode 100644
index 0000000..c5d22b0
--- /dev/null
+++ b/mpz/com.c

@@ -0,0 +1,87 @@
+/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
+   SRC to DST.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2003, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_com (mpz_ptr dst, mpz_srcptr src)
+{
+  mp_size_t size = SIZ (src);
+  mp_srcptr src_ptr;
+  mp_ptr dst_ptr;
+
+  if (size >= 0)
+    {
+      /* As with infinite precision: one's complement, two's complement.
+	 But this can be simplified using the identity -x = ~x + 1.
+	 So we're going to compute (~~x) + 1 = x + 1!  */
+
+      if (UNLIKELY (size == 0))
+	{
+	  /* special case, as mpn_add_1 wants size!=0 */
+	  MPZ_NEWALLOC (dst, 1)[0] = 1;
+	  SIZ (dst) = -1;
+	}
+      else
+	{
+	  mp_limb_t cy;
+
+	  dst_ptr = MPZ_REALLOC (dst, size + 1);
+
+	  src_ptr = PTR (src);
+
+	  cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+	  dst_ptr[size] = cy;
+	  size += cy;
+
+	  /* Store a negative size, to indicate ones-extension.  */
+	  SIZ (dst) = -size;
+      }
+    }
+  else
+    {
+      /* As with infinite precision: two's complement, then one's complement.
+	 But that can be simplified using the identity -x = ~(x - 1).
+	 So we're going to compute ~~(x - 1) = x - 1!  */
+      size = -size;
+
+      dst_ptr = MPZ_REALLOC (dst, size);
+
+      src_ptr = PTR (src);
+
+      mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+      size -= dst_ptr[size - 1] == 0;
+
+      /* Store a positive size, to indicate zero-extension.  */
+      SIZ (dst) = size;
+    }
+}

diff --git a/mpz/combit.c b/mpz/combit.c
new file mode 100644
index 0000000..0a29875
--- /dev/null
+++ b/mpz/combit.c

@@ -0,0 +1,103 @@
+/* mpz_combit -- complement a specified bit.
+
+Copyright 2002, 2003, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_combit (mpz_ptr d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dsize = SIZ(d);
+  mp_ptr dp = PTR(d);
+
+  mp_size_t limb_index = bit_index / GMP_NUMB_BITS;
+  mp_limb_t bit = (CNST_LIMB (1) << (bit_index % GMP_NUMB_BITS));
+
+  /* Check for the most common case: Positive input, no realloc or
+     normalization needed. */
+  if (limb_index + 1 < dsize)
+    dp[limb_index] ^= bit;
+
+  /* Check for the hairy case. d < 0, and we have all zero bits to the
+     right of the bit to toggle. */
+  else if (limb_index < -dsize
+	   && (limb_index == 0 || mpn_zero_p (dp, limb_index))
+	   && (dp[limb_index] & (bit - 1)) == 0)
+    {
+      ASSERT (dsize < 0);
+      dsize = -dsize;
+
+      if (dp[limb_index] & bit)
+	{
+	  /* We toggle the least significant one bit. Corresponds to
+	     an add, with potential carry propagation, on the absolute
+	     value. */
+	  dp = MPZ_REALLOC (d, 1 + dsize);
+	  dp[dsize] = 0;
+	  MPN_INCR_U (dp + limb_index, 1 + dsize - limb_index, bit);
+	  SIZ(d) = - dsize - dp[dsize];
+	}
+      else
+	{
+	  /* We toggle a zero bit, subtract from the absolute value. */
+	  MPN_DECR_U (dp + limb_index, dsize - limb_index, bit);
+	  /* The absolute value shrinked by at most one bit. */
+	  dsize -= dp[dsize - 1] == 0;
+	  ASSERT (dsize > 0 && dp[dsize - 1] != 0);
+	  SIZ (d) = -dsize;
+	}
+    }
+  else
+    {
+      /* Simple case: Toggle the bit in the absolute value. */
+      dsize = ABS(dsize);
+      if (limb_index < dsize)
+	{
+	  mp_limb_t	 dlimb;
+	  dlimb = dp[limb_index] ^ bit;
+	  dp[limb_index] = dlimb;
+
+	  /* Can happen only when limb_index = dsize - 1. Avoid SIZ(d)
+	     bookkeeping in the common case. */
+	  if (UNLIKELY ((dlimb == 0) + limb_index == dsize)) /* dsize == limb_index + 1 */
+	    {
+	      /* high limb became zero, must normalize */
+	      MPN_NORMALIZE (dp, limb_index);
+	      SIZ (d) = SIZ (d) >= 0 ? limb_index : -limb_index;
+	    }
+	}
+      else
+	{
+	  dp = MPZ_REALLOC (d, limb_index + 1);
+	  MPN_ZERO(dp + dsize, limb_index - dsize);
+	  dp[limb_index++] = bit;
+	  SIZ(d) = SIZ(d) >= 0 ? limb_index : -limb_index;
+	}
+    }
+}

diff --git a/mpz/cong.c b/mpz/cong.c
new file mode 100644
index 0000000..5d2d835
--- /dev/null
+++ b/mpz/cong.c

@@ -0,0 +1,182 @@
+/* mpz_congruent_p -- test congruence of two mpz's.
+
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* For big divisors this code is only very slightly better than the user
+   doing a combination of mpz_sub and mpz_tdiv_r, but it's quite convenient,
+   and perhaps in the future can be improved, in similar ways to
+   mpn_divisible_p perhaps.
+
+   The csize==1 / dsize==1 special case makes mpz_congruent_p as good as
+   mpz_congruent_ui_p on relevant operands, though such a combination
+   probably doesn't occur often.
+
+   Alternatives:
+
+   If c<d then it'd work to just form a%d and compare a and c (either as
+   a==c or a+c==d depending on the signs), but the saving from avoiding the
+   abs(a-c) calculation would be small compared to the division.
+
+   Similarly if both a<d and c<d then it would work to just compare a and c
+   (a==c or a+c==d), but this isn't considered a particularly important case
+   and so isn't done for the moment.
+
+   Low zero limbs on d could be stripped and the corresponding limbs of a
+   and c tested and skipped, but doing so would introduce a borrow when a
+   and c differ in sign and have non-zero skipped limbs.  It doesn't seem
+   worth the complications to do this, since low zero limbs on d should
+   occur only rarely.  */
+
+int
+mpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d)
+{
+  mp_size_t  asize, csize, dsize, sign;
+  mp_srcptr  ap, cp, dp;
+  mp_ptr     xp;
+  mp_limb_t  alow, clow, dlow, dmask, r;
+  int        result;
+  TMP_DECL;
+
+  dsize = SIZ(d);
+  if (UNLIKELY (dsize == 0))
+    return (mpz_cmp (a, c) == 0);
+
+  dsize = ABS(dsize);
+  dp = PTR(d);
+
+  if (ABSIZ(a) < ABSIZ(c))
+    MPZ_SRCPTR_SWAP (a, c);
+
+  asize = SIZ(a);
+  csize = SIZ(c);
+  sign = (asize ^ csize);
+
+  asize = ABS(asize);
+  ap = PTR(a);
+
+  if (csize == 0)
+    return mpn_divisible_p (ap, asize, dp, dsize);
+
+  csize = ABS(csize);
+  cp = PTR(c);
+
+  alow = ap[0];
+  clow = cp[0];
+  dlow = dp[0];
+
+  /* Check a==c mod low zero bits of dlow.  This might catch a few cases of
+     a!=c quickly, and it helps the csize==1 special cases below.  */
+  dmask = LOW_ZEROS_MASK (dlow) & GMP_NUMB_MASK;
+  alow = (sign >= 0 ? alow : -alow);
+  if (((alow-clow) & dmask) != 0)
+    return 0;
+
+  if (csize == 1)
+    {
+      if (dsize == 1)
+	{
+	cong_1:
+	  if (sign < 0)
+	    NEG_MOD (clow, clow, dlow);
+
+	  if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+	    {
+	      r = mpn_mod_1 (ap, asize, dlow);
+	      if (clow < dlow)
+		return r == clow;
+	      else
+		return r == (clow % dlow);
+	    }
+
+	  if ((dlow & 1) == 0)
+	    {
+	      /* Strip low zero bits to get odd d required by modexact.  If
+		 d==e*2^n then a==c mod d if and only if both a==c mod e and
+		 a==c mod 2^n, the latter having been done above.  */
+	      unsigned	twos;
+	      count_trailing_zeros (twos, dlow);
+	      dlow >>= twos;
+	    }
+
+	  r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
+	  return r == 0 || r == dlow;
+	}
+
+      /* dlow==0 is avoided since we don't want to bother handling extra low
+	 zero bits if dsecond is even (would involve borrow if a,c differ in
+	 sign and alow,clow!=0).  */
+      if (dsize == 2 && dlow != 0)
+	{
+	  mp_limb_t  dsecond = dp[1];
+
+	  if (dsecond <= dmask)
+	    {
+	      unsigned	 twos;
+	      count_trailing_zeros (twos, dlow);
+	      dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+	      ASSERT_LIMB (dlow);
+
+	      /* dlow will be odd here, so the test for it even under cong_1
+		 is unnecessary, but the rest of that code is wanted. */
+	      goto cong_1;
+	    }
+	}
+    }
+
+  TMP_MARK;
+  xp = TMP_ALLOC_LIMBS (asize+1);
+
+  /* calculate abs(a-c) */
+  if (sign >= 0)
+    {
+      /* same signs, subtract */
+      if (asize > csize || mpn_cmp (ap, cp, asize) >= 0)
+	ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
+      else
+	ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
+      MPN_NORMALIZE (xp, asize);
+    }
+  else
+    {
+      /* different signs, add */
+      mp_limb_t  carry;
+      carry = mpn_add (xp, ap, asize, cp, csize);
+      xp[asize] = carry;
+      asize += (carry != 0);
+    }
+
+  result = mpn_divisible_p (xp, asize, dp, dsize);
+
+  TMP_FREE;
+  return result;
+}

diff --git a/mpz/cong_2exp.c b/mpz/cong_2exp.c
new file mode 100644
index 0000000..9de9645
--- /dev/null
+++ b/mpz/cong_2exp.c

@@ -0,0 +1,149 @@
+/* mpz_congruent_2exp_p -- test congruence of mpz mod 2^n.
+
+Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+int
+mpz_congruent_2exp_p (mpz_srcptr a, mpz_srcptr c, mp_bitcnt_t d) __GMP_NOTHROW
+{
+  mp_size_t      i, dlimbs;
+  unsigned       dbits;
+  mp_ptr         ap, cp;
+  mp_limb_t      dmask, alimb, climb, sum;
+  mp_size_t      as, cs, asize, csize;
+
+  as = SIZ(a);
+  asize = ABS(as);
+
+  cs = SIZ(c);
+  csize = ABS(cs);
+
+  if (asize < csize)
+    {
+      MPZ_SRCPTR_SWAP (a, c);
+      MP_SIZE_T_SWAP (asize, csize);
+    }
+
+  dlimbs = d / GMP_NUMB_BITS;
+  dbits = d % GMP_NUMB_BITS;
+  dmask = (CNST_LIMB(1) << dbits) - 1;
+
+  ap = PTR(a);
+  cp = PTR(c);
+
+  if (csize == 0)
+    goto a_zeros;
+
+  if ((cs ^ as) >= 0)
+    {
+      /* same signs, direct comparison */
+
+      /* a==c for limbs in common */
+      if (mpn_cmp (ap, cp, MIN (csize, dlimbs)) != 0)
+	return 0;
+
+      /* if that's all of dlimbs, then a==c for remaining bits */
+      if (csize > dlimbs)
+	return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
+
+    a_zeros:
+      /* a remains, need all zero bits */
+
+      /* if d covers all of a and c, then must be exactly equal */
+      if (asize <= dlimbs)
+	return asize == csize;
+
+      /* whole limbs zero */
+      for (i = csize; i < dlimbs; i++)
+	if (ap[i] != 0)
+	  return 0;
+
+      /* partial limb zero */
+      return (ap[dlimbs] & dmask) == 0;
+    }
+  else
+    {
+      /* different signs, negated comparison */
+
+      /* common low zero limbs, stopping at first non-zeros, which must
+	 match twos complement */
+      i = 0;
+      do
+	{
+	  ASSERT (i < csize);  /* always have a non-zero limb on c */
+	  alimb = ap[i];
+	  climb = cp[i];
+	  sum = (alimb + climb) & GMP_NUMB_MASK;
+
+	  if (i >= dlimbs)
+	    return (sum & dmask) == 0;
+	  ++i;
+
+	  /* require both zero, or first non-zeros as twos-complements */
+	  if (sum != 0)
+	    return 0;
+	} while (alimb == 0);
+
+      /* further limbs matching as ones-complement */
+      for (; i < csize; ++i)
+	{
+	  alimb = ap[i];
+	  climb = cp[i];
+	  sum = alimb ^ climb ^ GMP_NUMB_MASK;
+
+	  if (i >= dlimbs)
+	    return (sum & dmask) == 0;
+
+	  if (sum != 0)
+	    return 0;
+	}
+
+      /* no more c, so require all 1 bits in a */
+
+      if (asize < dlimbs)
+	return 0;   /* not enough a */
+
+      /* whole limbs */
+      for ( ; i < dlimbs; i++)
+	if (ap[i] != GMP_NUMB_MAX)
+	  return 0;
+
+      /* if only whole limbs, no further fetches from a */
+      if (dbits == 0)
+	return 1;
+
+      /* need enough a */
+      if (asize == dlimbs)
+	return 0;
+
+      return ((ap[dlimbs]+1) & dmask) == 0;
+    }
+}

diff --git a/mpz/cong_ui.c b/mpz/cong_ui.c
new file mode 100644
index 0000000..6f86c23
--- /dev/null
+++ b/mpz/cong_ui.c

@@ -0,0 +1,115 @@
+/* mpz_congruent_ui_p -- test congruence of mpz and ulong.
+
+Copyright 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* There's some explicit checks for c<d since it seems reasonably likely an
+   application might use that in a test.
+
+   Hopefully the compiler can generate something good for r==(c%d), though
+   if modexact is being used exclusively then that's not reached.  */
+
+int
+mpz_congruent_ui_p (mpz_srcptr a, unsigned long cu, unsigned long du)
+{
+  mp_srcptr  ap;
+  mp_size_t  asize;
+  mp_limb_t  c, d, r;
+
+  if (UNLIKELY (du == 0))
+    return (mpz_cmp_ui (a, cu) == 0);
+
+  asize = SIZ(a);
+  if (asize == 0)
+    {
+      if (cu < du)
+	return cu == 0;
+      else
+	return (cu % du) == 0;
+    }
+
+  /* For nails don't try to be clever if c or d is bigger than a limb, just
+     fake up some mpz_t's and go to the main mpz_congruent_p.  */
+  if (du > GMP_NUMB_MAX || cu > GMP_NUMB_MAX)
+    {
+      mp_limb_t  climbs[2], dlimbs[2];
+      mpz_t      cz, dz;
+
+      ALLOC(cz) = 2;
+      PTR(cz) = climbs;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+
+      mpz_set_ui (cz, cu);
+      mpz_set_ui (dz, du);
+      return mpz_congruent_p (a, cz, dz);
+    }
+
+  /* NEG_MOD works on limbs, so convert ulong to limb */
+  c = cu;
+  d = du;
+
+  if (asize < 0)
+    {
+      asize = -asize;
+      NEG_MOD (c, c, d);
+    }
+
+  ap = PTR (a);
+
+  if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+    {
+      r = mpn_mod_1 (ap, asize, d);
+      if (c < d)
+	return r == c;
+      else
+	return r == (c % d);
+    }
+
+  if ((d & 1) == 0)
+    {
+      /* Strip low zero bits to get odd d required by modexact.  If
+	 d==e*2^n then a==c mod d if and only if both a==c mod 2^n
+	 and a==c mod e.  */
+
+      unsigned	twos;
+
+      if ((ap[0]-c) & LOW_ZEROS_MASK (d))
+	return 0;
+
+      count_trailing_zeros (twos, d);
+      d >>= twos;
+    }
+
+  r = mpn_modexact_1c_odd (ap, asize, d, c);
+  return r == 0 || r == d;
+}

diff --git a/mpz/dive_ui.c b/mpz/dive_ui.c
new file mode 100644
index 0000000..c859b96
--- /dev/null
+++ b/mpz/dive_ui.c

@@ -0,0 +1,68 @@
+/* mpz_divexact_ui -- exact division mpz by ulong.
+
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_divexact_ui (mpz_ptr dst, mpz_srcptr src, unsigned long divisor)
+{
+  mp_size_t  size, abs_size;
+  mp_ptr     dst_ptr;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  /* For nails don't try to be clever if d is bigger than a limb, just fake
+     up an mpz_t and go to the main mpz_divexact.  */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t  dlimbs[2];
+      mpz_t      dz;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+      mpz_set_ui (dz, divisor);
+      mpz_divexact (dst, src, dz);
+      return;
+    }
+
+  size = SIZ(src);
+  if (size == 0)
+    {
+      SIZ(dst) = 0;
+      return;
+    }
+  abs_size = ABS (size);
+
+  dst_ptr = MPZ_REALLOC (dst, abs_size);
+
+  MPN_DIVREM_OR_DIVEXACT_1 (dst_ptr, PTR(src), abs_size, (mp_limb_t) divisor);
+  abs_size -= (dst_ptr[abs_size-1] == 0);
+  SIZ(dst) = (size >= 0 ? abs_size : -abs_size);
+}

diff --git a/mpz/divegcd.c b/mpz/divegcd.c
new file mode 100644
index 0000000..dc236c3
--- /dev/null
+++ b/mpz/divegcd.c

@@ -0,0 +1,156 @@
+/* mpz_divexact_gcd -- exact division optimized for GCDs.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2005, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Set q to a/d, expecting d to be from a GCD and therefore usually small.
+
+   The distribution of GCDs of random numbers can be found in Knuth volume 2
+   section 4.5.2 theorem D.
+
+            GCD     chance
+             1       60.8%
+            2^k      20.2%     (1<=k<32)
+           3*2^k      9.0%     (1<=k<32)
+           other     10.1%
+
+   Only the low limb is examined for optimizations, since GCDs bigger than
+   2^32 (or 2^64) will occur very infrequently.
+
+   Future: This could change to an mpn_divexact_gcd, possibly partly
+   inlined, if/when the relevant mpq functions change to an mpn based
+   implementation.  */
+
+
+#if GMP_NUMB_BITS % 2 == 0
+static void
+mpz_divexact_by3 (mpz_ptr q, mpz_srcptr a)
+{
+  mp_size_t  size = SIZ(a);
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
+
+  qp = MPZ_REALLOC (q, abs_size);
+
+  mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 3);
+
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+static void
+mpz_divexact_by5 (mpz_ptr q, mpz_srcptr a)
+{
+  mp_size_t  size = SIZ(a);
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
+
+  qp = MPZ_REALLOC (q, abs_size);
+
+  mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 5);
+
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+static void
+mpz_divexact_limb (mpz_ptr q, mpz_srcptr a, mp_limb_t d)
+{
+  mp_size_t  size = SIZ(a);
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
+
+  qp = MPZ_REALLOC (q, abs_size);
+
+  MPN_DIVREM_OR_DIVEXACT_1 (qp, PTR(a), abs_size, d);
+
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size > 0 ? abs_size : -abs_size);
+}
+
+void
+mpz_divexact_gcd (mpz_ptr q, mpz_srcptr a, mpz_srcptr d)
+{
+  ASSERT (mpz_sgn (d) > 0);
+
+  if (SIZ(a) == 0)
+    {
+      SIZ(q) = 0;
+      return;
+    }
+
+  if (SIZ(d) == 1)
+    {
+      mp_limb_t  dl = PTR(d)[0];
+      int        twos;
+
+      if ((dl & 1) == 0)
+	{
+	  count_trailing_zeros (twos, dl);
+	  dl >>= twos;
+	  mpz_tdiv_q_2exp (q, a, twos);
+	  a = q;
+	}
+
+      if (dl == 1)
+	{
+	  if (q != a)
+	    mpz_set (q, a);
+	  return;
+	}
+#if GMP_NUMB_BITS % 2 == 0
+      if (dl == 3)
+	{
+	  mpz_divexact_by3 (q, a);
+	  return;
+	}
+#endif
+#if GMP_NUMB_BITS % 4 == 0
+      if (dl == 5)
+	{
+	  mpz_divexact_by5 (q, a);
+	  return;
+	}
+#endif
+
+      mpz_divexact_limb (q, a, dl);
+      return;
+    }
+
+  mpz_divexact (q, a, d);
+}

diff --git a/mpz/divexact.c b/mpz/divexact.c
new file mode 100644
index 0000000..8336819
--- /dev/null
+++ b/mpz/divexact.c

@@ -0,0 +1,89 @@
+/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0.
+
+Contributed to the GNU project by Niels Möller.
+
+Copyright 1991, 1993-1998, 2000-2002, 2005-2007, 2009, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+void
+mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_ptr qp;
+  mp_size_t qn;
+  mp_srcptr np, dp;
+  mp_size_t nn, dn;
+  TMP_DECL;
+
+#if WANT_ASSERT
+  {
+    mpz_t  rem;
+    mpz_init (rem);
+    mpz_tdiv_r (rem, num, den);
+    ASSERT (SIZ(rem) == 0);
+    mpz_clear (rem);
+  }
+#endif
+
+  nn = ABSIZ (num);
+  dn = ABSIZ (den);
+
+  if (nn < dn)
+    {
+      /* This special case avoids segfaults below when the function is
+	 incorrectly called with |N| < |D|, N != 0.  It also handles the
+	 well-defined case N = 0.  */
+      SIZ(quot) = 0;
+      return;
+    }
+
+  qn = nn - dn + 1;
+
+  TMP_MARK;
+
+  if (quot == num || quot == den)
+    qp = TMP_ALLOC_LIMBS (qn);
+  else
+    qp = MPZ_NEWALLOC (quot, qn);
+
+  np = PTR(num);
+  dp = PTR(den);
+
+  mpn_divexact (qp, np, nn, dp, dn);
+  MPN_NORMALIZE (qp, qn);
+
+  if (qp != PTR(quot))
+    MPN_COPY (MPZ_NEWALLOC (quot, qn), qp, qn);
+
+  SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
+
+  TMP_FREE;
+}

diff --git a/mpz/divis.c b/mpz/divis.c
new file mode 100644
index 0000000..35429a7
--- /dev/null
+++ b/mpz/divis.c

@@ -0,0 +1,43 @@
+/* mpz_divisible_p -- mpz by mpz divisibility test
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_divisible_p (mpz_srcptr a, mpz_srcptr d)
+{
+  mp_size_t dsize = SIZ(d);
+  mp_size_t asize = SIZ(a);
+
+  if (UNLIKELY (dsize == 0))
+    return (asize == 0);
+
+  return mpn_divisible_p (PTR(a), ABS(asize), PTR(d), ABS(dsize));
+}

diff --git a/mpz/divis_2exp.c b/mpz/divis_2exp.c
new file mode 100644
index 0000000..4ecb0c0
--- /dev/null
+++ b/mpz/divis_2exp.c

@@ -0,0 +1,60 @@
+/* mpz_divisible_2exp_p -- mpz by 2^n divisibility test
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+int
+mpz_divisible_2exp_p (mpz_srcptr a, mp_bitcnt_t d) __GMP_NOTHROW
+{
+  mp_size_t      i, dlimbs;
+  unsigned       dbits;
+  mp_ptr         ap;
+  mp_limb_t      dmask;
+  mp_size_t      asize;
+
+  asize = ABSIZ(a);
+  dlimbs = d / GMP_NUMB_BITS;
+
+  /* if d covers the whole of a, then only a==0 is divisible */
+  if (asize <= dlimbs)
+    return asize == 0;
+
+  /* whole limbs must be zero */
+  ap = PTR(a);
+  for (i = 0; i < dlimbs; i++)
+    if (ap[i] != 0)
+      return 0;
+
+  /* left over bits must be zero */
+  dbits = d % GMP_NUMB_BITS;
+  dmask = (CNST_LIMB(1) << dbits) - 1;
+  return (ap[dlimbs] & dmask) == 0;
+}

diff --git a/mpz/divis_ui.c b/mpz/divis_ui.c
new file mode 100644
index 0000000..b24c7dc
--- /dev/null
+++ b/mpz/divis_ui.c

@@ -0,0 +1,80 @@
+/* mpz_divisible_ui_p -- mpz by ulong divisibility test.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_divisible_ui_p (mpz_srcptr a, unsigned long d)
+{
+  mp_size_t  asize;
+  mp_ptr     ap;
+  unsigned   twos;
+
+  asize = SIZ(a);
+  if (UNLIKELY (d == 0))
+    return (asize == 0);
+
+  if (asize == 0)  /* 0 divisible by any d */
+    return 1;
+
+  /* For nails don't try to be clever if d is bigger than a limb, just fake
+     up an mpz_t and go to the main mpz_divisible_p.  */
+  if (d > GMP_NUMB_MAX)
+    {
+      mp_limb_t  dlimbs[2];
+      mpz_t      dz;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+      mpz_set_ui (dz, d);
+      return mpz_divisible_p (a, dz);
+    }
+
+  ap = PTR(a);
+  asize = ABS(asize);  /* ignore sign of a */
+
+  if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+    return mpn_mod_1 (ap, asize, (mp_limb_t) d) == 0;
+
+  if (! (d & 1))
+    {
+      /* Strip low zero bits to get odd d required by modexact.  If d==e*2^n
+	 and a is divisible by 2^n and by e, then it's divisible by d. */
+
+      if ((ap[0] & LOW_ZEROS_MASK (d)) != 0)
+	return 0;
+
+      count_trailing_zeros (twos, (mp_limb_t) d);
+      d >>= twos;
+    }
+
+  return mpn_modexact_1_odd (ap, asize, (mp_limb_t) d) == 0;
+}

diff --git a/mpz/dump.c b/mpz/dump.c
new file mode 100644
index 0000000..6135ddd
--- /dev/null
+++ b/mpz/dump.c

@@ -0,0 +1,48 @@
+/* mpz_dump - Dump an integer to stdout.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1999-2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h> /* for strlen */
+#include "gmp-impl.h"
+
+void
+mpz_dump (mpz_srcptr u)
+{
+  char *str;
+
+  str = mpz_get_str (0, 10, u);
+  printf ("%s\n", str);
+  (*__gmp_free_func) (str, strlen (str) + 1);
+}

diff --git a/mpz/export.c b/mpz/export.c
new file mode 100644
index 0000000..2dacd69
--- /dev/null
+++ b/mpz/export.c

@@ -0,0 +1,189 @@
+/* mpz_export -- create word data from mpz.
+
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HOST_ENDIAN     1
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HOST_ENDIAN     (-1)
+#endif
+#ifndef HOST_ENDIAN
+static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
+#define HOST_ENDIAN     (* (signed char *) &endian_test)
+#endif
+
+void *
+mpz_export (void *data, size_t *countp, int order,
+	    size_t size, int endian, size_t nail, mpz_srcptr z)
+{
+  mp_size_t      zsize;
+  mp_srcptr      zp;
+  size_t         count, dummy;
+  unsigned long  numb;
+  unsigned       align;
+
+  ASSERT (order == 1 || order == -1);
+  ASSERT (endian == 1 || endian == 0 || endian == -1);
+  ASSERT (nail <= 8*size);
+  ASSERT (nail <  8*size || SIZ(z) == 0); /* nail < 8*size+(SIZ(z)==0) */
+
+  if (countp == NULL)
+    countp = &dummy;
+
+  zsize = SIZ(z);
+  if (zsize == 0)
+    {
+      *countp = 0;
+      return data;
+    }
+
+  zsize = ABS (zsize);
+  zp = PTR(z);
+  numb = 8*size - nail;
+  MPN_SIZEINBASE_2EXP (count, zp, zsize, numb);
+  *countp = count;
+
+  if (data == NULL)
+    data = (*__gmp_allocate_func) (count*size);
+
+  if (endian == 0)
+    endian = HOST_ENDIAN;
+
+  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);
+
+  if (nail == GMP_NAIL_BITS)
+    {
+      if (size == sizeof (mp_limb_t) && align == 0)
+	{
+	  if (order == -1 && endian == HOST_ENDIAN)
+	    {
+	      MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
+	      return data;
+	    }
+	  if (order == 1 && endian == HOST_ENDIAN)
+	    {
+	      MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+	      return data;
+	    }
+
+	  if (order == -1 && endian == -HOST_ENDIAN)
+	    {
+	      MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
+	      return data;
+	    }
+	  if (order == 1 && endian == -HOST_ENDIAN)
+	    {
+	      MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+	      return data;
+	    }
+	}
+    }
+
+  {
+    mp_limb_t      limb, wbitsmask;
+    size_t         i, numb;
+    mp_size_t      j, wbytes, woffset;
+    unsigned char  *dp;
+    int            lbits, wbits;
+    mp_srcptr      zend;
+
+    numb = size * 8 - nail;
+
+    /* whole bytes per word */
+    wbytes = numb / 8;
+
+    /* possible partial byte */
+    wbits = numb % 8;
+    wbitsmask = (CNST_LIMB(1) << wbits) - 1;
+
+    /* offset to get to the next word */
+    woffset = (endian >= 0 ? size : - (mp_size_t) size)
+      + (order < 0 ? size : - (mp_size_t) size);
+
+    /* least significant byte */
+    dp = (unsigned char *) data
+      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);
+
+#define EXTRACT(N, MASK)                                \
+    do {                                                \
+      if (lbits >= (N))                                 \
+        {                                               \
+          *dp = limb MASK;                              \
+          limb >>= (N);                                 \
+          lbits -= (N);                                 \
+        }                                               \
+      else                                              \
+        {                                               \
+          mp_limb_t  newlimb;                           \
+          newlimb = (zp == zend ? 0 : *zp++);           \
+          *dp = (limb | (newlimb << lbits)) MASK;       \
+          limb = newlimb >> ((N)-lbits);                \
+          lbits += GMP_NUMB_BITS - (N);                 \
+        }                                               \
+    } while (0)
+
+    zend = zp + zsize;
+    lbits = 0;
+    limb = 0;
+    for (i = 0; i < count; i++)
+      {
+	for (j = 0; j < wbytes; j++)
+	  {
+	    EXTRACT (8, + 0);
+	    dp -= endian;
+	  }
+	if (wbits != 0)
+	  {
+	    EXTRACT (wbits, & wbitsmask);
+	    dp -= endian;
+	    j++;
+	  }
+	for ( ; j < size; j++)
+	  {
+	    *dp = '\0';
+	    dp -= endian;
+	  }
+	dp += woffset;
+      }
+
+    ASSERT (zp == PTR(z) + ABSIZ(z));
+
+    /* low byte of word after most significant */
+    ASSERT (dp == (unsigned char *) data
+	    + (order < 0 ? count*size : - (mp_size_t) size)
+	    + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+  }
+  return data;
+}

diff --git a/mpz/fac_ui.c b/mpz/fac_ui.c
new file mode 100644
index 0000000..a7998b3
--- /dev/null
+++ b/mpz/fac_ui.c

@@ -0,0 +1,122 @@
+/* mpz_fac_ui(RESULT, N) -- Set RESULT to N!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 1991, 1993-1995, 2000-2003, 2011, 2012, 2015, 2021 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_ODD_THRESHOLD)+1))
+#endif
+
+/* Computes n!, the factorial of n.
+   WARNING: it assumes that n fits in a limb!
+ */
+void
+mpz_fac_ui (mpz_ptr x, unsigned long n)
+{
+  static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
+
+  ASSERT (n <= GMP_NUMB_MAX);
+
+  if (n < numberof (table))
+    {
+      MPZ_NEWALLOC (x, 1)[0] = table[n];
+      SIZ (x) = 1;
+    }
+  else if (BELOW_THRESHOLD (n, FAC_ODD_THRESHOLD))
+    {
+      mp_limb_t prod, max_prod;
+      mp_size_t j;
+      mp_ptr    factors;
+      mp_limb_t fac, diff = n - numberof (table);
+      TMP_SDECL;
+
+      TMP_SMARK;
+      factors = TMP_SALLOC_LIMBS (2 + diff / FACTORS_PER_LIMB);
+
+      factors[0] = table[numberof (table)-1];
+      j = 1;
+      if ((diff & 1) == 0)
+	{
+	  prod = n;
+	  /* if (diff != 0) */
+	    fac = --n * numberof (table);
+	}
+      else
+	{
+	  prod = n * numberof (table);
+	  fac = prod + --diff;
+	}
+
+#if TUNE_PROGRAM_BUILD
+      max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
+#else
+      max_prod = GMP_NUMB_MAX /
+	(((FAC_ODD_THRESHOLD + numberof (table) + 1) / 2) *
+	 ((FAC_ODD_THRESHOLD + numberof (table)) / 2));
+#endif
+      for (;diff != 0; fac += (diff -= 2))
+	FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
+
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
+
+      TMP_SFREE;
+    }
+  else
+    {
+      mp_limb_t count;
+      mpz_oddfac_1 (x, n, 0);
+      if (n <= TABLE_LIMIT_2N_MINUS_POPC_2N)
+	count = __gmp_fac2cnt_table[n / 2 - 1];
+      else
+	{
+	  popc_limb (count, n);
+	  count = n - count;
+	}
+      mpz_mul_2exp (x, x, count);
+    }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE

diff --git a/mpz/fdiv_q.c b/mpz/fdiv_q.c
new file mode 100644
index 0000000..f17023e
--- /dev/null
+++ b/mpz/fdiv_q.c

@@ -0,0 +1,52 @@
+/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t dividend_size = SIZ (dividend);
+  mp_size_t divisor_size = SIZ (divisor);
+  mpz_t rem;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend_size) < 0 && SIZ (rem) != 0)
+    mpz_sub_ui (quot, quot, 1L);
+
+  TMP_FREE;
+}

diff --git a/mpz/fdiv_q_ui.c b/mpz/fdiv_q_ui.c
new file mode 100644
index 0000000..539f951
--- /dev/null
+++ b/mpz/fdiv_q_ui.c

@@ -0,0 +1,100 @@
+/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  qp[0] = 0;
+	  rl = np[0];
+	  qn = 1;		/* a white lie, fixed below */
+	}
+      else
+	{
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	  qn = nn - 2 + 1;
+	}
+
+      if (rl != 0 && ns < 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	}
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+
+      if (rl != 0 && ns < 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	}
+
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/fdiv_qr.c b/mpz/fdiv_qr.c
new file mode 100644
index 0000000..a0a6166
--- /dev/null
+++ b/mpz/fdiv_qr.c

@@ -0,0 +1,64 @@
+/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = SIZ (divisor);
+  mp_size_t xsize;
+  mpz_t temp_divisor;		/* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the quotient and
+     remainder have been preliminary calculated.  We have to copy it to
+     temporary space if it's the same variable as either QUOT or REM.  */
+  if (quot == divisor || rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  xsize = SIZ (dividend) ^ divisor_size;;
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if (xsize < 0 && SIZ (rem) != 0)
+    {
+      mpz_sub_ui (quot, quot, 1L);
+      mpz_add (rem, rem, divisor);
+    }
+
+  TMP_FREE;
+}

diff --git a/mpz/fdiv_qr_ui.c b/mpz/fdiv_qr_ui.c
new file mode 100644
index 0000000..984ca1c
--- /dev/null
+++ b/mpz/fdiv_qr_ui.c

@@ -0,0 +1,116 @@
+/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 1999, 2001, 2002, 2004, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      rp = MPZ_REALLOC (rem, 2);
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  qp[0] = 0;
+	  qn = 1;		/* a white lie, fixed below */
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	  qn = nn - 2 + 1;
+	}
+
+      if (rl != 0 && ns < 0)
+	{
+	  mpn_incr_u (qp, (mp_limb_t) 1);
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  if (ns < 0)
+	    {
+	      mpn_incr_u (qp, (mp_limb_t) 1);
+	      rl = divisor - rl;
+	    }
+
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  SIZ(rem) = rl != 0;
+	}
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/fdiv_r.c b/mpz/fdiv_r.c
new file mode 100644
index 0000000..c7a3c8e
--- /dev/null
+++ b/mpz/fdiv_r.c

@@ -0,0 +1,59 @@
+/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = SIZ (divisor);
+  mpz_t temp_divisor;		/* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  if ((divisor_size ^ SIZ (dividend)) < 0 && SIZ (rem) != 0)
+    mpz_add (rem, rem, divisor);
+
+  TMP_FREE;
+}

diff --git a/mpz/fdiv_r_ui.c b/mpz/fdiv_r_ui.c
new file mode 100644
index 0000000..f2df8a8
--- /dev/null
+++ b/mpz/fdiv_r_ui.c

@@ -0,0 +1,107 @@
+/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 2001, 2002, 2004, 2005, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      rp = MPZ_REALLOC (rem, 2);
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  TMP_MARK;
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  TMP_FREE;
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	}
+
+      if (rl != 0 && ns < 0)
+	{
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  if (ns < 0)
+	    rl = divisor - rl;
+
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  SIZ(rem) = 1;
+	}
+    }
+
+  return rl;
+}

diff --git a/mpz/fdiv_ui.c b/mpz/fdiv_ui.c
new file mode 100644
index 0000000..f0aacdb
--- /dev/null
+++ b/mpz/fdiv_ui.c

@@ -0,0 +1,100 @@
+/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994-1996, 2001, 2002, 2004, 2005, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  rp[0] = rl;
+	}
+      else
+	{
+	  TMP_MARK;
+	  dp[0] = divisor & GMP_NUMB_MASK;
+	  dp[1] = divisor >> GMP_NUMB_BITS;
+	  qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+	  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+	  TMP_FREE;
+	  rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+	}
+
+      if (rl != 0 && ns < 0)
+	{
+	  rl = divisor - rl;
+	  rp[0] = rl & GMP_NUMB_MASK;
+	  rp[1] = rl >> GMP_NUMB_BITS;
+	}
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	;
+      else
+	{
+	  if (ns < 0)
+	    rl = divisor - rl;
+	}
+    }
+
+  return rl;
+}

diff --git a/mpz/fib2_ui.c b/mpz/fib2_ui.c
new file mode 100644
index 0000000..ea26227
--- /dev/null
+++ b/mpz/fib2_ui.c

@@ -0,0 +1,58 @@
+/* mpz_fib2_ui -- calculate Fibonacci numbers.
+
+Copyright 2001, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+void
+mpz_fib2_ui (mpz_ptr fn, mpz_ptr fnsub1, unsigned long n)
+{
+  mp_ptr     fp, f1p;
+  mp_size_t  size;
+
+  if (n <= FIB_TABLE_LIMIT)
+    {
+      MPZ_NEWALLOC (fn, 1)[0] = FIB_TABLE (n);
+      SIZ(fn) = (n != 0);      /* F[0]==0, others are !=0 */
+      MPZ_NEWALLOC (fnsub1, 1)[0] = FIB_TABLE ((int) n - 1);
+      SIZ(fnsub1) = (n != 1);  /* F[1-1]==0, others are !=0 */
+      return;
+    }
+
+  size = MPN_FIB2_SIZE (n);
+  fp =  MPZ_NEWALLOC (fn,     size);
+  f1p = MPZ_NEWALLOC (fnsub1, size);
+
+  size = mpn_fib2_ui (fp, f1p, n);
+
+  SIZ(fn)     = size;
+  SIZ(fnsub1) = size - (f1p[size-1] == 0);
+}

diff --git a/mpz/fib_ui.c b/mpz/fib_ui.c
new file mode 100644
index 0000000..a65f3c9
--- /dev/null
+++ b/mpz/fib_ui.c

@@ -0,0 +1,158 @@
+/* mpz_fib_ui -- calculate Fibonacci numbers.
+
+Copyright 2000-2002, 2005, 2012, 2014, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* change to "#define TRACE(x) x" to get some traces */
+#define TRACE(x)
+
+
+/* In the F[2k+1] below for k odd, the -2 won't give a borrow from the low
+   limb because the result F[2k+1] is an F[4m+3] and such numbers are always
+   == 1, 2 or 5 mod 8, whereas an underflow would leave 6 or 7.  (This is
+   the same as in mpn_fib2_ui.)
+
+   In the F[2k+1] for k even, the +2 won't give a carry out of the low limb
+   in normal circumstances.  This is an F[4m+1] and we claim that F[3*2^b+1]
+   == 1 mod 2^b is the first F[4m+1] congruent to 0 or 1 mod 2^b, and hence
+   if n < 2^GMP_NUMB_BITS then F[n] cannot have a low limb of 0 or 1.  No
+   proof for this claim, but it's been verified up to b==32 and has such a
+   nice pattern it must be true :-).  Of interest is that F[3*2^b] == 0 mod
+   2^(b+1) seems to hold too.
+
+   When n >= 2^GMP_NUMB_BITS, which can arise in a nails build, then the low
+   limb of F[4m+1] can certainly be 1, and an mpn_add_1 must be used.  */
+
+void
+mpz_fib_ui (mpz_ptr fn, unsigned long n)
+{
+  mp_ptr         fp, xp, yp;
+  mp_size_t      size, xalloc;
+  unsigned long  n2;
+  mp_limb_t      c;
+  TMP_DECL;
+
+  if (n <= FIB_TABLE_LIMIT)
+    {
+      MPZ_NEWALLOC (fn, 1)[0] = FIB_TABLE (n);
+      SIZ(fn) = (n != 0);      /* F[0]==0, others are !=0 */
+      return;
+    }
+
+  n2 = n/2;
+  xalloc = MPN_FIB2_SIZE (n2) + 1;
+  fp = MPZ_NEWALLOC (fn, 2 * xalloc);
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc);
+  size = mpn_fib2_ui (xp, yp, n2);
+
+  TRACE (printf ("mpz_fib_ui last step n=%lu size=%ld bit=%lu\n",
+		 n >> 1, size, n&1);
+	 mpn_trace ("xp", xp, size);
+	 mpn_trace ("yp", yp, size));
+
+  if (n & 1)
+    {
+      /* F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k  */
+      mp_size_t  xsize, ysize;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      xp[size] = mpn_lshift (xp, xp, size, 1);
+      yp[size] = 0;
+      ASSERT_NOCARRY (mpn_add_n_sub_n (xp, yp, xp, yp, size+1));
+      xsize = size + (xp[size] != 0);
+      ASSERT (yp[size] <= 1);
+      ysize = size + yp[size];
+#else
+      mp_limb_t  c2;
+
+      c2 = mpn_lshift (fp, xp, size, 1);
+      c = c2 + mpn_add_n (xp, fp, yp, size);
+      xp[size] = c;
+      xsize = size + (c != 0);
+      c2 -= mpn_sub_n (yp, fp, yp, size);
+      yp[size] = c2;
+      ASSERT (c2 <= 1);
+      ysize = size + c2;
+#endif
+
+      size = xsize + ysize;
+      c = mpn_mul (fp, xp, xsize, yp, ysize);
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+      /* no overflow, see comments above */
+      ASSERT (n & 2 ? fp[0] >= 2 : fp[0] <= GMP_NUMB_MAX-2);
+      fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2));
+#else
+      if (n & 2)
+	{
+	  ASSERT (fp[0] >= 2);
+	  fp[0] -= 2;
+	}
+      else
+	{
+	  ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
+	  c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
+	  fp[size-1] = c;
+	}
+#endif
+    }
+  else
+    {
+      /* F[2k] = F[k]*(F[k]+2F[k-1]) */
+
+      mp_size_t  xsize, ysize;
+#if HAVE_NATIVE_mpn_addlsh1_n
+      c = mpn_addlsh1_n (yp, xp, yp, size);
+#else
+      c = mpn_lshift (yp, yp, size, 1);
+      c += mpn_add_n (yp, yp, xp, size);
+#endif
+      yp[size] = c;
+      xsize = size;
+      ysize = size + (c != 0);
+      size += ysize;
+      c = mpn_mul (fp, yp, ysize, xp, xsize);
+    }
+
+  /* one or two high zeros */
+  size -= (c == 0);
+  size -= (fp[size-1] == 0);
+  SIZ(fn) = size;
+
+  TRACE (printf ("done special, size=%ld\n", size);
+	 mpn_trace ("fp ", fp, size));
+
+  TMP_FREE;
+}

diff --git a/mpz/fits_s.h b/mpz/fits_s.h
new file mode 100644
index 0000000..9fd6a0b
--- /dev/null
+++ b/mpz/fits_s.h

@@ -0,0 +1,60 @@
+/* int mpz_fits_X_p (mpz_t z) -- test whether z fits signed type X.
+
+Copyright 1997, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+int
+FUNCTION (mpz_srcptr z) __GMP_NOTHROW
+{
+  mp_size_t n = SIZ(z);
+  mp_ptr p = PTR(z);
+  mp_limb_t limb = p[0];
+
+  if (n == 0)
+    return 1;
+  if (n == 1)
+    return limb <= MAXIMUM;
+  if (n == -1)
+    return limb <= NEG_CAST (mp_limb_t, MINIMUM);
+#if GMP_NAIL_BITS != 0
+  {
+    if ((p[1] >> GMP_NAIL_BITS) == 0)
+      {
+	limb += p[1] << GMP_NUMB_BITS;
+	if (n == 2)
+	  return limb <= MAXIMUM;
+	if (n == -2)
+	  return limb <= NEG_CAST (mp_limb_t, MINIMUM);
+      }
+  }
+#endif
+  return 0;
+}

diff --git a/mpz/fits_sint.c b/mpz/fits_sint.c
new file mode 100644
index 0000000..d548c45
--- /dev/null
+++ b/mpz/fits_sint.c

@@ -0,0 +1,36 @@
+/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits an int.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_sint_p
+#define MAXIMUM   INT_MAX
+#define MINIMUM   INT_MIN
+
+#include "fits_s.h"

diff --git a/mpz/fits_slong.c b/mpz/fits_slong.c
new file mode 100644
index 0000000..9306a00
--- /dev/null
+++ b/mpz/fits_slong.c

@@ -0,0 +1,36 @@
+/* int mpz_fits_slong_p (mpz_t z) -- test whether z fits a long.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_slong_p
+#define MAXIMUM   LONG_MAX
+#define MINIMUM   LONG_MIN
+
+#include "fits_s.h"

diff --git a/mpz/fits_sshort.c b/mpz/fits_sshort.c
new file mode 100644
index 0000000..431d6b0
--- /dev/null
+++ b/mpz/fits_sshort.c

@@ -0,0 +1,36 @@
+/* int mpz_fits_sshort_p (mpz_t z) -- test whether z fits a short.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_sshort_p
+#define MAXIMUM   SHRT_MAX
+#define MINIMUM   SHRT_MIN
+
+#include "fits_s.h"

diff --git a/mpz/fits_uint.c b/mpz/fits_uint.c
new file mode 100644
index 0000000..6becc8b
--- /dev/null
+++ b/mpz/fits_uint.c

@@ -0,0 +1,33 @@
+/* mpz_fits_uint_p -- test whether z fits an unsigned int.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_uint_p 1
+
+#include "gmp-impl.h"

diff --git a/mpz/fits_ulong.c b/mpz/fits_ulong.c
new file mode 100644
index 0000000..c70886b
--- /dev/null
+++ b/mpz/fits_ulong.c

@@ -0,0 +1,33 @@
+/* mpz_fits_ulong_p -- test whether z fits an unsigned long.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_ulong_p 1
+
+#include "gmp-impl.h"

diff --git a/mpz/fits_ushort.c b/mpz/fits_ushort.c
new file mode 100644
index 0000000..16873d6
--- /dev/null
+++ b/mpz/fits_ushort.c

@@ -0,0 +1,33 @@
+/* mpz_fits_ushort_p -- test whether z fits an unsigned short.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_ushort_p 1
+
+#include "gmp-impl.h"

diff --git a/mpz/gcd.c b/mpz/gcd.c
new file mode 100644
index 0000000..9557155
--- /dev/null
+++ b/mpz/gcd.c

@@ -0,0 +1,164 @@
+/* mpz/gcd.c:   Calculate the greatest common divisor of two integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2005, 2010, 2015, 2016
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
+{
+  unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
+  mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
+  mp_ptr tp;
+  mp_ptr up;
+  mp_size_t usize;
+  mp_ptr vp;
+  mp_size_t vsize;
+  mp_size_t gsize;
+  TMP_DECL;
+
+  up = PTR(u);
+  usize = ABSIZ (u);
+  vp = PTR(v);
+  vsize = ABSIZ (v);
+  /* GCD(0, V) == V.  */
+  if (usize == 0)
+    {
+      SIZ (g) = vsize;
+      if (g == v)
+	return;
+      tp = MPZ_NEWALLOC (g, vsize);
+      MPN_COPY (tp, vp, vsize);
+      return;
+    }
+
+  /* GCD(U, 0) == U.  */
+  if (vsize == 0)
+    {
+      SIZ (g) = usize;
+      if (g == u)
+	return;
+      tp = MPZ_NEWALLOC (g, usize);
+      MPN_COPY (tp, up, usize);
+      return;
+    }
+
+  if (usize == 1)
+    {
+      SIZ (g) = 1;
+      MPZ_NEWALLOC (g, 1)[0] = mpn_gcd_1 (vp, vsize, up[0]);
+      return;
+    }
+
+  if (vsize == 1)
+    {
+      SIZ(g) = 1;
+      MPZ_NEWALLOC (g, 1)[0] = mpn_gcd_1 (up, usize, vp[0]);
+      return;
+    }
+
+  TMP_MARK;
+
+  /*  Eliminate low zero bits from U and V and move to temporary storage.  */
+  tp = up;
+  while (*tp == 0)
+    tp++;
+  u_zero_limbs = tp - up;
+  usize -= u_zero_limbs;
+  count_trailing_zeros (u_zero_bits, *tp);
+  up = TMP_ALLOC_LIMBS (usize);
+  if (u_zero_bits != 0)
+    {
+      mpn_rshift (up, tp, usize, u_zero_bits);
+      usize -= up[usize - 1] == 0;
+    }
+  else
+    MPN_COPY (up, tp, usize);
+
+  tp = vp;
+  while (*tp == 0)
+    tp++;
+  v_zero_limbs = tp - vp;
+  vsize -= v_zero_limbs;
+  count_trailing_zeros (v_zero_bits, *tp);
+  vp = TMP_ALLOC_LIMBS (vsize);
+  if (v_zero_bits != 0)
+    {
+      mpn_rshift (vp, tp, vsize, v_zero_bits);
+      vsize -= vp[vsize - 1] == 0;
+    }
+  else
+    MPN_COPY (vp, tp, vsize);
+
+  if (u_zero_limbs > v_zero_limbs)
+    {
+      g_zero_limbs = v_zero_limbs;
+      g_zero_bits = v_zero_bits;
+    }
+  else
+    {
+      g_zero_limbs = u_zero_limbs;
+      if (u_zero_limbs < v_zero_limbs)
+	g_zero_bits = u_zero_bits;
+      else  /*  Equal.  */
+	g_zero_bits = MIN (u_zero_bits, v_zero_bits);
+    }
+
+  /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */
+  vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
+    ? mpn_gcd (vp, vp, vsize, up, usize)
+    : mpn_gcd (vp, up, usize, vp, vsize);
+
+  /*  Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits).  */
+  gsize = vsize + g_zero_limbs;
+  if (g_zero_bits != 0)
+    {
+      mp_limb_t cy_limb;
+      gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
+      tp = MPZ_NEWALLOC (g, gsize);
+      MPN_ZERO (tp, g_zero_limbs);
+
+      tp = tp + g_zero_limbs;
+      cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
+      if (cy_limb != 0)
+	tp[vsize] = cy_limb;
+    }
+  else
+    {
+      tp = MPZ_NEWALLOC (g, gsize);
+      MPN_ZERO (tp, g_zero_limbs);
+      MPN_COPY (tp + g_zero_limbs, vp, vsize);
+    }
+
+  SIZ (g) = gsize;
+  TMP_FREE;
+}

diff --git a/mpz/gcd_ui.c b/mpz/gcd_ui.c
new file mode 100644
index 0000000..64aa46d
--- /dev/null
+++ b/mpz/gcd_ui.c

@@ -0,0 +1,92 @@
+/* mpz_gcd_ui -- Calculate the greatest common divisor of two integers.
+
+Copyright 1994, 1996, 1999-2004, 2015, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_gcd_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+{
+  mp_size_t un;
+  mp_limb_t res;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpz_t vz, lw;
+      mp_limb_t vlimbs[2], wlimbs[2];
+
+      if (w == NULL)
+	{
+	  PTR(lw) = wlimbs;
+	  ALLOC(lw) = 2;
+	  SIZ(lw) = 0;
+	  w = lw;
+	}
+      vlimbs[0] = v & GMP_NUMB_MASK;
+      vlimbs[1] = v >> GMP_NUMB_BITS;
+      PTR(vz) = vlimbs;
+      SIZ(vz) = 2;
+      mpz_gcd (w, u, vz);
+      /* because v!=0 we will have w<=v hence fitting a ulong */
+      ASSERT (mpz_fits_ulong_p (w));
+      return mpz_get_ui (w);
+    }
+#endif
+
+  un = ABSIZ(u);
+
+  if (un == 0)
+    res = v;
+  else if (v == 0)
+    {
+      if (w != NULL)
+	{
+	  if (u != w)
+	    {
+	      MPZ_NEWALLOC (w, un);
+	      MPN_COPY (PTR(w), PTR(u), un);
+	    }
+	  SIZ(w) = un;
+	}
+      /* Return u if it fits a ulong, otherwise 0. */
+      res = PTR(u)[0];
+      return (un == 1 && res <= ULONG_MAX ? res : 0);
+    }
+  else
+    res = mpn_gcd_1 (PTR(u), un, (mp_limb_t) v);
+
+  if (w != NULL)
+    {
+      MPZ_NEWALLOC (w, 1)[0] = res;
+      SIZ(w) = res != 0;
+    }
+  return res;
+}

diff --git a/mpz/gcdext.c b/mpz/gcdext.c
new file mode 100644
index 0000000..b1f73c2
--- /dev/null
+++ b/mpz/gcdext.c

@@ -0,0 +1,135 @@
+/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
+   g = as + bt.
+
+Copyright 1991, 1993-1997, 2000, 2001, 2005, 2011, 2012, 2015 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+void
+mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
+{
+  mp_size_t asize, bsize;
+  mp_ptr tmp_ap, tmp_bp;
+  mp_size_t gsize, ssize, tmp_ssize;
+  mp_ptr gp, tmp_gp, tmp_sp;
+  TMP_DECL;
+
+  /* mpn_gcdext requires that Usize >= Vsize.  Therefore, we often
+     have to swap U and V.  The computed cofactor will be the
+     "smallest" one, which is faster to produce.  The wanted one will
+     be computed here; this is needed anyway when both are requested.  */
+
+  asize = ABSIZ (a);
+  bsize = ABSIZ (b);
+
+  ASSERT (s != NULL);
+
+  if (asize < bsize)
+    {
+      MPZ_SRCPTR_SWAP (a, b);
+      MP_SIZE_T_SWAP (asize, bsize);
+      MPZ_PTR_SWAP (s, t);
+    }
+
+  if (bsize == 0)
+    {
+      /* g = |a|, s = sgn(a), t = 0. */
+      ssize = SIZ (a) >= 0 ? (asize != 0) : -1;
+
+      if (g != NULL)
+	{
+	  /* If g == a, then ALLOC(g) == ALLOC(a) >= asize, i.e.
+	     the next MPZ_NEWALLOC returns the old PTR(a) .*/
+	  gp = MPZ_NEWALLOC (g, asize);
+	  MPN_COPY (gp, PTR (a), asize);
+	  SIZ (g) = asize;
+	}
+      if (t != NULL)
+	SIZ (t) = 0;
+      if (s != NULL)
+	{
+	  SIZ (s) = ssize;
+	  MPZ_NEWALLOC (s, 1)[0] = 1;
+	}
+      return;
+    }
+
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (tmp_gp, bsize, tmp_sp, asize + bsize + bsize + 1);
+  tmp_bp = tmp_sp + bsize + 1;
+  tmp_ap = tmp_bp + bsize;
+  MPN_COPY (tmp_ap, PTR (a), asize);
+  MPN_COPY (tmp_bp, PTR (b), bsize);
+
+  gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, tmp_ap, asize, tmp_bp, bsize);
+
+  ssize = ABS (tmp_ssize);
+  tmp_ssize = SIZ (a) >= 0 ? tmp_ssize : -tmp_ssize;
+
+  if (t != NULL)
+    {
+      mpz_t x;
+      mpz_t gtmp, stmp;
+
+      PTR (gtmp) = tmp_gp;
+      SIZ (gtmp) = gsize;
+
+      PTR (stmp) = tmp_sp;
+      SIZ (stmp) = tmp_ssize;
+
+      ASSERT (ssize <= bsize); /* ssize*2 + asize + 1 <= asize + bsize*2 + 1 */
+      PTR (x) = tmp_sp + ssize;
+      ALLOC (x) = ssize + asize + 1;
+
+      mpz_mul (x, stmp, a);
+      mpz_sub (x, gtmp, x);
+      mpz_divexact (t, x, b);
+    }
+
+  if (s != NULL)
+    {
+      mp_ptr sp;
+
+      sp = MPZ_NEWALLOC (s, ssize);
+      MPN_COPY (sp, tmp_sp, ssize);
+      SIZ (s) = tmp_ssize;
+    }
+
+  if (g != NULL)
+    {
+      gp = MPZ_NEWALLOC (g, gsize);
+      MPN_COPY (gp, tmp_gp, gsize);
+      SIZ (g) = gsize;
+    }
+
+  TMP_FREE;
+}

diff --git a/mpz/get_d.c b/mpz/get_d.c
new file mode 100644
index 0000000..61d4e36
--- /dev/null
+++ b/mpz/get_d.c

@@ -0,0 +1,43 @@
+/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC.
+
+Copyright 1996, 1997, 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+double
+mpz_get_d (mpz_srcptr z)
+{
+  mp_size_t size;
+
+  size = SIZ (z);
+  if (UNLIKELY (size == 0))
+    return 0.0;
+
+  return mpn_get_d (PTR (z), ABS (size), size, 0L);
+}

diff --git a/mpz/get_d_2exp.c b/mpz/get_d_2exp.c
new file mode 100644
index 0000000..beb364a
--- /dev/null
+++ b/mpz/get_d_2exp.c

@@ -0,0 +1,53 @@
+/* double mpz_get_d_2exp (signed long int *exp, mpz_t src).
+
+Copyright 2001, 2003, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+double
+mpz_get_d_2exp (signed long int *exp2, mpz_srcptr src)
+{
+  mp_size_t size, abs_size;
+  mp_srcptr ptr;
+  long exp;
+
+  size = SIZ(src);
+  if (UNLIKELY (size == 0))
+    {
+      *exp2 = 0;
+      return 0.0;
+    }
+
+  ptr = PTR(src);
+  abs_size = ABS(size);
+  MPN_SIZEINBASE_2EXP(exp, ptr, abs_size, 1);
+  *exp2 = exp;
+  return mpn_get_d (ptr, abs_size, size, -exp);
+}

diff --git a/mpz/get_si.c b/mpz/get_si.c
new file mode 100644
index 0000000..c65be2e
--- /dev/null
+++ b/mpz/get_si.c

@@ -0,0 +1,52 @@
+/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
+
+Copyright 1991, 1993-1995, 2000-2002, 2006, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+signed long int
+mpz_get_si (mpz_srcptr z) __GMP_NOTHROW
+{
+  mp_ptr zp = PTR (z);
+  mp_size_t size = SIZ (z);
+  mp_limb_t zl = zp[0];
+
+#if GMP_NAIL_BITS != 0
+  if (ULONG_MAX > GMP_NUMB_MAX && ABS (size) >= 2)
+    zl |= zp[1] << GMP_NUMB_BITS;
+#endif
+
+  if (size > 0)
+    return zl & LONG_MAX;
+  else if (size < 0)
+    /* This expression is necessary to properly handle 0x80000000 */
+    return -1 - (long) ((zl - 1) & LONG_MAX);
+  else
+    return 0;
+}

diff --git a/mpz/get_str.c b/mpz/get_str.c
new file mode 100644
index 0000000..c00a9a3
--- /dev/null
+++ b/mpz/get_str.c

@@ -0,0 +1,115 @@
+/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision
+   number MP_SRC to a string STRING of base BASE.  If STRING is NULL
+   allocate space for the result.  In any case, return a pointer to the
+   result.  If STRING is not NULL, the caller must ensure enough space is
+   available to store the result.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2005, 2012, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <string.h> /* for strlen */
+#include "gmp-impl.h"
+#include "longlong.h"
+
+char *
+mpz_get_str (char *res_str, int base, mpz_srcptr x)
+{
+  mp_ptr xp;
+  mp_size_t x_size = SIZ (x);
+  char *return_str;
+  size_t str_size;
+  size_t alloc_size = 0;
+  const char *num_to_text;
+  int i;
+  TMP_DECL;
+
+  num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+  if (base > 1)
+    {
+      if (base <= 36)
+	num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      else if (UNLIKELY (base > 62))
+	return NULL;
+    }
+  else if (base > -2)
+    {
+      base = 10;
+    }
+  else
+    {
+      base = -base;
+      if (UNLIKELY (base > 36))
+	return NULL;
+    }
+
+  /* allocate string for the user if necessary */
+  if (res_str == NULL)
+    {
+      /* digits, null terminator, possible minus sign */
+      MPN_SIZEINBASE (alloc_size, PTR(x), ABS(x_size), base);
+      alloc_size += 1 + (x_size<0);
+      res_str = __GMP_ALLOCATE_FUNC_TYPE (alloc_size, char);
+    }
+  return_str = res_str;
+
+  if (x_size < 0)
+    {
+      *res_str++ = '-';
+      x_size = -x_size;
+    }
+
+  /* mpn_get_str clobbers its input on non power-of-2 bases */
+  TMP_MARK;
+  xp = PTR (x);
+  if (! POW2_P (base))
+    {
+      xp = TMP_ALLOC_LIMBS (x_size | 1);  /* |1 in case x_size==0 */
+      MPN_COPY (xp, PTR (x), x_size);
+    }
+
+  str_size = mpn_get_str ((unsigned char *) res_str, base, xp, x_size);
+  ASSERT (alloc_size == 0 || str_size <= alloc_size - (SIZ(x) < 0));
+
+  /* Convert result to printable chars.  */
+  for (i = 0; i < str_size; i++)
+    res_str[i] = num_to_text[(int) res_str[i]];
+  res_str[str_size] = 0;
+
+  TMP_FREE;
+
+  /* if allocated then resize down to the actual space required */
+  if (alloc_size != 0)
+    {
+      size_t  actual_size = str_size + 1 + (res_str - return_str);
+      ASSERT (actual_size == strlen (return_str) + 1);
+      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (return_str, alloc_size, actual_size,
+					char);
+    }
+  return return_str;
+}

diff --git a/mpz/get_ui.c b/mpz/get_ui.c
new file mode 100644
index 0000000..2d7ea4e
--- /dev/null
+++ b/mpz/get_ui.c

@@ -0,0 +1,33 @@
+/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER.
+
+Copyright 1991, 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_get_ui 1
+
+#include "gmp-impl.h"

diff --git a/mpz/getlimbn.c b/mpz/getlimbn.c
new file mode 100644
index 0000000..d34d764
--- /dev/null
+++ b/mpz/getlimbn.c

@@ -0,0 +1,33 @@
+/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER.
+
+Copyright 1993-1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_getlimbn 1
+
+#include "gmp-impl.h"

diff --git a/mpz/hamdist.c b/mpz/hamdist.c
new file mode 100644
index 0000000..1dfb7b8
--- /dev/null
+++ b/mpz/hamdist.c

@@ -0,0 +1,174 @@
+/* mpz_hamdist -- calculate hamming distance.
+
+Copyright 1994, 1996, 2001, 2002, 2009-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+mp_bitcnt_t
+mpz_hamdist (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mp_srcptr      up, vp;
+  mp_size_t      usize, vsize;
+  mp_bitcnt_t    count;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+
+  up = PTR(u);
+  vp = PTR(v);
+
+  if (usize >= 0)
+    {
+      if (vsize < 0)
+	return ~ (mp_bitcnt_t) 0;
+
+      /* positive/positive */
+
+      if (usize < vsize)
+	MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+
+      count = 0;
+      if (vsize != 0)
+	count = mpn_hamdist (up, vp, vsize);
+
+      usize -= vsize;
+      if (usize != 0)
+	count += mpn_popcount (up + vsize, usize);
+
+      return count;
+    }
+  else
+    {
+      mp_limb_t  ulimb, vlimb;
+      mp_size_t  old_vsize, step;
+
+      if (vsize >= 0)
+	return ~ (mp_bitcnt_t) 0;
+
+      /* negative/negative */
+
+      usize = -usize;
+      vsize = -vsize;
+
+      /* skip common low zeros */
+      for (;;)
+	{
+	  ASSERT (usize > 0);
+	  ASSERT (vsize > 0);
+
+	  usize--;
+	  vsize--;
+
+	  ulimb = *up++;
+	  vlimb = *vp++;
+
+	  if (ulimb != 0)
+	    break;
+
+	  if (vlimb != 0)
+	    {
+	      MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+	      ulimb = vlimb;
+	      vlimb = 0;
+	      break;
+	    }
+	}
+
+      /* twos complement first non-zero limbs (ulimb is non-zero, but vlimb
+	 might be zero) */
+      ulimb = -ulimb;
+      vlimb = -vlimb;
+      popc_limb (count, (ulimb ^ vlimb) & GMP_NUMB_MASK);
+
+      if (vlimb == 0)
+	{
+	  mp_bitcnt_t  twoscount;
+
+	  /* first non-zero of v */
+	  old_vsize = vsize;
+	  do
+	    {
+	      ASSERT (vsize > 0);
+	      vsize--;
+	      vlimb = *vp++;
+	    }
+	  while (vlimb == 0);
+
+	  /* part of u corresponding to skipped v zeros */
+	  step = old_vsize - vsize - 1;
+	  count += step * GMP_NUMB_BITS;
+	  step = MIN (step, usize);
+	  if (step != 0)
+	    {
+	      count -= mpn_popcount (up, step);
+	      usize -= step;
+	      up += step;
+	    }
+
+	  /* First non-zero vlimb as twos complement, xor with ones
+	     complement ulimb.  Note -v^(~0^u) == (v-1)^u. */
+	  vlimb--;
+	  if (usize != 0)
+	    {
+	      usize--;
+	      vlimb ^= *up++;
+	    }
+	  popc_limb (twoscount, vlimb);
+	  count += twoscount;
+	}
+
+      /* Overlapping part of u and v, if any.  Ones complement both, so just
+	 plain hamdist. */
+      step = MIN (usize, vsize);
+      if (step != 0)
+	{
+	  count += mpn_hamdist (up, vp, step);
+	  usize -= step;
+	  vsize -= step;
+	  up += step;
+	  vp += step;
+	}
+
+      /* Remaining high part of u or v, if any, ones complement but xor
+	 against all ones in the other, so plain popcount. */
+      if (usize != 0)
+	{
+	remaining:
+	  count += mpn_popcount (up, usize);
+	}
+      else if (vsize != 0)
+	{
+	  up = vp;
+	  usize = vsize;
+	  goto remaining;
+	}
+      return count;
+    }
+}

diff --git a/mpz/import.c b/mpz/import.c
new file mode 100644
index 0000000..9ff0670
--- /dev/null
+++ b/mpz/import.c

@@ -0,0 +1,166 @@
+/* mpz_import -- set mpz from word data.
+
+Copyright 2002, 2012, 2021, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HOST_ENDIAN     1
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HOST_ENDIAN     (-1)
+#endif
+#ifndef HOST_ENDIAN
+static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
+#define HOST_ENDIAN     (* (signed char *) &endian_test)
+#endif
+
+
+void
+mpz_import (mpz_ptr z, size_t count, int order,
+	    size_t size, int endian, size_t nail, const void *data)
+{
+  mp_size_t  zsize;
+  mp_ptr     zp;
+
+  ASSERT (order == 1 || order == -1);
+  ASSERT (endian == 1 || endian == 0 || endian == -1);
+  ASSERT (nail <= 8*size);
+
+  zsize = BITS_TO_LIMBS (count * (8*size - nail));
+  zp = MPZ_NEWALLOC (z, zsize);
+
+  if (endian == 0)
+    endian = HOST_ENDIAN;
+
+  /* Can't use these special cases with nails currently, since they don't
+     mask out the nail bits in the input data.  */
+  if (nail == 0 && GMP_NAIL_BITS == 0
+      && size == sizeof (mp_limb_t)
+      && (((char *) data - (char *) NULL) % sizeof (mp_limb_t)) == 0 /* align */)
+    {
+      if (order == -1)
+	{
+	  if (endian == HOST_ENDIAN)
+	    MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
+	  else /* if (endian == - HOST_ENDIAN) */
+	    MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
+	}
+      else /* if (order == 1) */
+	{
+	  if (endian == HOST_ENDIAN)
+	    MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
+	  else /* if (endian == - HOST_ENDIAN) */
+	    MPN_BSWAP_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
+	}
+    }
+  else
+  {
+    mp_limb_t      limb, byte, wbitsmask;
+    size_t         i, j, numb, wbytes;
+    mp_size_t      woffset;
+    unsigned char  *dp;
+    int            lbits, wbits;
+
+    numb = size * 8 - nail;
+
+    /* whole bytes to process */
+    wbytes = numb / 8;
+
+    /* partial byte to process */
+    wbits = numb % 8;
+    wbitsmask = (CNST_LIMB(1) << wbits) - 1;
+
+    /* offset to get to the next word after processing wbytes and wbits */
+    woffset = (numb + 7) / 8;
+    woffset = (endian >= 0 ? woffset : -woffset)
+      + (order < 0 ? size : - (mp_size_t) size);
+
+    /* least significant byte */
+    dp = (unsigned char *) data
+      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);
+
+#define ACCUMULATE(N)                                   \
+    do {                                                \
+      ASSERT (lbits < GMP_NUMB_BITS);                   \
+      ASSERT (limb <= (CNST_LIMB(1) << lbits) - 1);     \
+                                                        \
+      limb |= (mp_limb_t) byte << lbits;                \
+      lbits += (N);                                     \
+      if (lbits >= GMP_NUMB_BITS)                       \
+        {                                               \
+          *zp++ = limb & GMP_NUMB_MASK;                 \
+          lbits -= GMP_NUMB_BITS;                       \
+          ASSERT (lbits < (N));                         \
+          limb = byte >> ((N) - lbits);                 \
+        }                                               \
+    } while (0)
+
+    limb = 0;
+    lbits = 0;
+    for (i = 0; i < count; i++)
+      {
+	for (j = 0; j < wbytes; j++)
+	  {
+	    byte = *dp;
+	    dp -= endian;
+	    ACCUMULATE (8);
+	  }
+	if (wbits != 0)
+	  {
+	    byte = *dp & wbitsmask;
+	    dp -= endian;
+	    ACCUMULATE (wbits);
+	  }
+	dp += woffset;
+      }
+
+    if (lbits != 0)
+      {
+	ASSERT (lbits <= GMP_NUMB_BITS);
+	ASSERT_LIMB (limb);
+	*zp++ = limb;
+      }
+
+    ASSERT (zp == PTR(z) + zsize);
+
+    /* low byte of word after most significant */
+    ASSERT (dp == (unsigned char *) data
+	    + (order < 0 ? count*size : - (mp_size_t) size)
+	    + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+
+  }
+
+  zp = PTR(z);
+  MPN_NORMALIZE (zp, zsize);
+  SIZ(z) = zsize;
+}

diff --git a/mpz/init.c b/mpz/init.c
new file mode 100644
index 0000000..b85a2da
--- /dev/null
+++ b/mpz/init.c

@@ -0,0 +1,41 @@
+/* mpz_init() -- Make a new multiple precision number with value 0.
+
+Copyright 1991, 1993-1995, 2000-2002, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init (mpz_ptr x) __GMP_NOTHROW
+{
+  static const mp_limb_t dummy_limb=0xc1a0;
+  ALLOC (x) = 0;
+  PTR (x) = (mp_ptr) &dummy_limb;
+  SIZ (x) = 0;
+}

diff --git a/mpz/init2.c b/mpz/init2.c
new file mode 100644
index 0000000..cdb46bf
--- /dev/null
+++ b/mpz/init2.c

@@ -0,0 +1,50 @@
+/* mpz_init2 -- initialize mpz, with requested size in bits.
+
+Copyright 2001, 2002, 2008, 2021, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init2 (mpz_ptr x, mp_bitcnt_t bits)
+{
+  mp_size_t  new_alloc;
+
+  bits -= (bits != 0);		/* Round down, except if 0 */
+  new_alloc = 1 + bits / GMP_NUMB_BITS;
+
+  if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+	MPZ_OVERFLOW;
+    }
+
+  PTR(x) = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+  ALLOC(x) = new_alloc;
+  SIZ(x) = 0;
+}

diff --git a/mpz/inits.c b/mpz/inits.c
new file mode 100644
index 0000000..1660744
--- /dev/null
+++ b/mpz/inits.c

@@ -0,0 +1,53 @@
+/* mpz_inits() -- Initialize multiple mpz_t variables and set them to 0.
+
+Copyright 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+void
+mpz_inits (mpz_ptr x, ...) __GMP_NOTHROW
+{
+  static const mp_limb_t dummy_limb=0xc1a0;
+  va_list  ap;
+
+  va_start (ap, x);
+
+  do
+    {
+      ALLOC (x) = 0;
+      PTR (x) = (mp_ptr) &dummy_limb;
+      SIZ (x) = 0;
+
+      x = va_arg (ap, mpz_ptr);
+    }
+  while (x != NULL);
+
+  va_end (ap);
+}

diff --git a/mpz/inp_raw.c b/mpz/inp_raw.c
new file mode 100644
index 0000000..746d926
--- /dev/null
+++ b/mpz/inp_raw.c

@@ -0,0 +1,172 @@
+/* mpz_inp_raw -- read an mpz_t in raw format.
+
+Copyright 2001, 2002, 2005, 2012, 2016, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+/* NTOH_LIMB_FETCH fetches a limb which is in network byte order (ie. big
+   endian) and produces a normal host byte order result. */
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define NTOH_LIMB_FETCH(limb, src)  do { (limb) = *(src); } while (0)
+#endif
+
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define NTOH_LIMB_FETCH(limb, src)  BSWAP_LIMB_FETCH (limb, src)
+#endif
+
+#ifndef NTOH_LIMB_FETCH
+#define NTOH_LIMB_FETCH(limb, src)                              \
+  do {                                                          \
+    const unsigned char  *__p = (const unsigned char *) (src);  \
+    mp_limb_t  __limb;                                          \
+    int        __i;                                             \
+    __limb = 0;                                                 \
+    for (__i = 0; __i < GMP_LIMB_BYTES; __i++)               \
+      __limb = (__limb << 8) | __p[__i];                        \
+    (limb) = __limb;                                            \
+  } while (0)
+#endif
+
+
+/* Enhancement: The byte swap loop ought to be safe to vectorize on Cray
+   etc, but someone who knows what they're doing needs to check it.  */
+
+size_t
+mpz_inp_raw (mpz_ptr x, FILE *fp)
+{
+  unsigned char  csize_bytes[4];
+  mp_size_t      csize, abs_xsize, i;
+  size_t         size;
+  size_t         abs_csize;
+  char           *cp;
+  mp_ptr         xp, sp, ep;
+  mp_limb_t      slimb, elimb;
+
+  if (fp == 0)
+    fp = stdin;
+
+  /* 4 bytes for size */
+  if (UNLIKELY (fread (csize_bytes, sizeof (csize_bytes), 1, fp) != 1))
+    return 0;
+
+  size = (((size_t) csize_bytes[0] << 24) + ((size_t) csize_bytes[1] << 16) +
+	  ((size_t) csize_bytes[2] << 8)  + ((size_t) csize_bytes[3]));
+
+  if (size < 0x80000000u)
+    csize = size;
+  else
+    csize = size - 0x80000000u - 0x80000000u;
+
+  abs_csize = ABS (csize);
+
+  if (UNLIKELY (abs_csize > ~(mp_bitcnt_t) 0 / 8))
+    return 0; /* Bit size overflows */
+
+  /* round up to a multiple of limbs */
+  abs_xsize = BITS_TO_LIMBS ((mp_bitcnt_t) abs_csize * 8);
+
+  if (abs_xsize != 0)
+    {
+      xp = MPZ_NEWALLOC (x, abs_xsize);
+
+      /* Get limb boundaries right in the read, for the benefit of the
+	 non-nails case.  */
+      xp[0] = 0;
+      cp = (char *) (xp + abs_xsize) - abs_csize;
+      if (UNLIKELY (fread (cp, abs_csize, 1, fp) != 1))
+	return 0;
+
+      if (GMP_NAIL_BITS == 0)
+	{
+	  /* Reverse limbs to least significant first, and byte swap.  If
+	     abs_xsize is odd then on the last iteration elimb and slimb are
+	     the same.  It doesn't seem extra code to handle that case
+	     separately, to save an NTOH.  */
+	  sp = xp;
+	  ep = xp + abs_xsize-1;
+	  for (i = 0; i < (abs_xsize+1)/2; i++)
+	    {
+	      NTOH_LIMB_FETCH (elimb, ep);
+	      NTOH_LIMB_FETCH (slimb, sp);
+	      *sp++ = elimb;
+	      *ep-- = slimb;
+	    }
+	}
+      else
+	{
+	  /* It ought to be possible to do the transformation in-place, but
+	     for now it's easier to use an extra temporary area.  */
+	  mp_limb_t  byte, limb;
+	  int	     bits;
+	  mp_size_t  tpos;
+	  mp_ptr     tp;
+	  TMP_DECL;
+
+	  TMP_MARK;
+	  tp = TMP_ALLOC_LIMBS (abs_xsize);
+	  limb = 0;
+	  bits = 0;
+	  tpos = 0;
+	  for (i = abs_csize-1; i >= 0; i--)
+	    {
+	      byte = (unsigned char) cp[i];
+	      limb |= (byte << bits);
+	      bits += 8;
+	      if (bits >= GMP_NUMB_BITS)
+		{
+		  ASSERT (tpos < abs_xsize);
+		  tp[tpos++] = limb & GMP_NUMB_MASK;
+		  bits -= GMP_NUMB_BITS;
+		  ASSERT (bits < 8);
+		  limb = byte >> (8 - bits);
+		}
+	    }
+	  if (bits != 0)
+	    {
+	      ASSERT (tpos < abs_xsize);
+	      tp[tpos++] = limb;
+	    }
+	  ASSERT (tpos == abs_xsize);
+
+	  MPN_COPY (xp, tp, abs_xsize);
+	  TMP_FREE;
+	}
+
+      /* GMP 1.x mpz_out_raw wrote high zero bytes, strip any high zero
+	 limbs resulting from this.  Should be a non-zero value here, but
+	 for safety don't assume that. */
+      MPN_NORMALIZE (xp, abs_xsize);
+    }
+
+  SIZ(x) = (csize >= 0 ? abs_xsize : -abs_xsize);
+  return abs_csize + 4;
+}

diff --git a/mpz/inp_str.c b/mpz/inp_str.c
new file mode 100644
index 0000000..0756055
--- /dev/null
+++ b/mpz/inp_str.c

@@ -0,0 +1,173 @@
+/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base
+   BASE from stdio stream STREAM and store the result in DEST_INTEGER.
+
+   OF THE FUNCTIONS IN THIS FILE, ONLY mpz_inp_str IS FOR EXTERNAL USE, THE
+   REST ARE INTERNALS AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE
+   CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2000-2003, 2011-2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define digit_value_tab __gmp_digit_value_tab
+
+size_t
+mpz_inp_str (mpz_ptr x, FILE *stream, int base)
+{
+  int c;
+  size_t nread;
+
+  if (stream == 0)
+    stream = stdin;
+
+  nread = 0;
+
+  /* Skip whitespace.  */
+  do
+    {
+      c = getc (stream);
+      nread++;
+    }
+  while (isspace (c));
+
+  return mpz_inp_str_nowhite (x, stream, base, c, nread);
+}
+
+/* shared by mpq_inp_str */
+size_t
+mpz_inp_str_nowhite (mpz_ptr x, FILE *stream, int base, int c, size_t nread)
+{
+  char *str;
+  size_t alloc_size, str_size;
+  int negative;
+  mp_size_t xsize;
+  const unsigned char *digit_value;
+
+  ASSERT_ALWAYS (EOF == -1);	/* FIXME: handle this by adding explicit */
+				/* comparisons of c and EOF before each  */
+				/* read of digit_value[].  */
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 208;
+      if (UNLIKELY (base > 62))
+	return 0;		/* too large base */
+    }
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = getc (stream);
+      nread++;
+    }
+
+  if (c == EOF || digit_value[c] >= (base == 0 ? 10 : base))
+    return 0;			/* error if no digits */
+
+  /* If BASE is 0, try to find out the base by looking at the initial
+     characters.  */
+  if (base == 0)
+    {
+      base = 10;
+      if (c == '0')
+	{
+	  base = 8;
+	  c = getc (stream);
+	  nread++;
+	  if (c == 'x' || c == 'X')
+	    {
+	      base = 16;
+	      c = getc (stream);
+	      nread++;
+	    }
+	  else if (c == 'b' || c == 'B')
+	    {
+	      base = 2;
+	      c = getc (stream);
+	      nread++;
+	    }
+	}
+    }
+
+  /* Skip leading zeros.  */
+  while (c == '0')
+    {
+      c = getc (stream);
+      nread++;
+    }
+
+  alloc_size = 100;
+  str = __GMP_ALLOCATE_FUNC_TYPE (alloc_size, char);
+  str_size = 0;
+
+  while (c != EOF)
+    {
+      int dig;
+      dig = digit_value[c];
+      if (dig >= base)
+	break;
+      if (str_size >= alloc_size)
+	{
+	  size_t old_alloc_size = alloc_size;
+	  alloc_size = alloc_size * 3 / 2;
+	  str = __GMP_REALLOCATE_FUNC_TYPE (str, old_alloc_size, alloc_size, char);
+	}
+      str[str_size++] = dig;
+      c = getc (stream);
+    }
+  nread += str_size;
+
+  ungetc (c, stream);
+  nread--;
+
+  /* Make sure the string is not empty, mpn_set_str would fail.  */
+  if (str_size == 0)
+    {
+      SIZ (x) = 0;
+    }
+  else
+    {
+      LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
+      MPZ_NEWALLOC (x, xsize);
+
+      /* Convert the byte array in base BASE to our bignum format.  */
+      xsize = mpn_set_str (PTR (x), (unsigned char *) str, str_size, base);
+      SIZ (x) = negative ? -xsize : xsize;
+    }
+  (*__gmp_free_func) (str, alloc_size);
+  return nread;
+}

diff --git a/mpz/invert.c b/mpz/invert.c
new file mode 100644
index 0000000..5532d13
--- /dev/null
+++ b/mpz/invert.c

@@ -0,0 +1,72 @@
+/* mpz_invert (inv, x, n).  Find multiplicative inverse of X in Z(N).
+   If X has an inverse, return non-zero and store inverse in INVERSE,
+   otherwise, return 0 and put garbage in INVERSE.
+
+Copyright 1996-2001, 2005, 2012, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
+{
+  mpz_t gcd, tmp;
+  mp_size_t xsize, nsize, size;
+  TMP_DECL;
+
+  xsize = ABSIZ (x);
+  nsize = ABSIZ (n);
+
+  size = MAX (xsize, nsize) + 1;
+  TMP_MARK;
+
+  MPZ_TMP_INIT (gcd, size);
+  MPZ_TMP_INIT (tmp, size);
+  mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
+
+  /* If no inverse existed, return with an indication of that.  */
+  if (!MPZ_EQUAL_1_P (gcd))
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  /* Make sure we return a positive inverse.  */
+  if (SIZ (tmp) < 0)
+    {
+      if (SIZ (n) < 0)
+	mpz_sub (inverse, tmp, n);
+      else
+	mpz_add (inverse, tmp, n);
+    }
+  else
+    mpz_set (inverse, tmp);
+
+  TMP_FREE;
+  return 1;
+}

diff --git a/mpz/ior.c b/mpz/ior.c
new file mode 100644
index 0000000..32f3042
--- /dev/null
+++ b/mpz/ior.c

@@ -0,0 +1,184 @@
+/* mpz_ior -- Logical inclusive or.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012, 2013,
+2015-2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size;
+  mp_size_t i;
+
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  if (op1_size < op2_size)
+    {
+      MPZ_SRCPTR_SWAP (op1, op2);
+      MP_SIZE_T_SWAP (op1_size, op2_size);
+    }
+
+  op1_ptr = PTR(op1);
+  res_ptr = PTR(res);
+
+  if (op2_size >= 0)
+    {
+      if (res_ptr != op1_ptr)
+	{
+	  res_ptr = MPZ_REALLOC (res, op1_size);
+	  /* No overlapping possible: op1_ptr = PTR(op1); */
+	  MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+		    op1_size - op2_size);
+	}
+      if (LIKELY (op2_size != 0))
+	mpn_ior_n (res_ptr, op1_ptr, PTR(op2), op2_size);
+
+      SIZ(res) = op1_size;
+    }
+  else
+    {
+      mp_ptr opx;
+      TMP_DECL;
+
+      TMP_MARK;
+      if (op1_size < 0)
+	{
+	  mp_ptr opy;
+
+	  /* Both operands are negative, so will be the result.
+	     -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) =
+	     = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 =
+	     = ((OP1 - 1) & (OP2 - 1)) + 1      */
+
+	  res_size = -op1_size;
+
+	  /* Possible optimization: Decrease mpn_sub precision,
+	     as we won't use the entire res of both.  */
+	  TMP_ALLOC_LIMBS_2 (opx, res_size, opy, res_size);
+	  mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
+	  op1_ptr = opx;
+
+	  mpn_sub_1 (opy, PTR(op2), res_size, (mp_limb_t) 1);
+	  op2_ptr = opy;
+
+	  /* First loop finds the size of the result.  */
+	  for (i = res_size; --i >= 0;)
+	    if ((op1_ptr[i] & op2_ptr[i]) != 0)
+	      break;
+	  res_size = i + 1;
+
+	  res_ptr = MPZ_NEWALLOC (res, res_size + 1);
+
+	  if (res_size != 0)
+	    {
+	      /* Second loop computes the real result.  */
+	      mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
+
+	      res_ptr[res_size] = 0;
+	      MPN_INCR_U (res_ptr, res_size + 1, 1);
+	      res_size += res_ptr[res_size];
+	    }
+	  else
+	    {
+	      res_ptr[0] = 1;
+	      res_size = 1;
+	    }
+
+	  SIZ(res) = -res_size;
+	}
+      else
+	{
+	  mp_limb_t cy;
+	  mp_size_t count;
+
+	  /* Operand 2 negative, so will be the result.
+	     -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) =
+	     = ~(OP1 | ~(OP2 - 1)) + 1 =
+	     = (~OP1 & (OP2 - 1)) + 1      */
+
+	  op2_size = -op2_size;
+
+	  res_ptr = MPZ_REALLOC (res, op2_size);
+	  op1_ptr = PTR(op1);
+
+	  opx = TMP_ALLOC_LIMBS (op2_size);
+	  mpn_sub_1 (opx, PTR(op2), op2_size, (mp_limb_t) 1);
+	  op2_ptr = opx;
+	  op2_size -= op2_ptr[op2_size - 1] == 0;
+
+	  if (op1_size >= op2_size)
+	    {
+	      /* We can just ignore the part of OP1 that stretches above OP2,
+		 because the result limbs are zero there.  */
+
+	      /* First loop finds the size of the result.  */
+	      for (i = op2_size; --i >= 0;)
+		if ((~op1_ptr[i] & op2_ptr[i]) != 0)
+		  break;
+	      res_size = i + 1;
+	      count = res_size;
+	    }
+	  else
+	    {
+	      res_size = op2_size;
+
+	      /* Copy the part of OP2 that stretches above OP1, to RES.  */
+	      MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+	      count = op1_size;
+	    }
+
+	  if (res_size != 0)
+	    {
+	      /* Second loop computes the real result.  */
+	      if (LIKELY (count != 0))
+		mpn_andn_n (res_ptr, op2_ptr, op1_ptr, count);
+
+	      cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+	      if (cy)
+		{
+		  res_ptr[res_size] = cy;
+		  ++res_size;
+		}
+	    }
+	  else
+	    {
+	      res_ptr[0] = 1;
+	      res_size = 1;
+	    }
+
+	  SIZ(res) = -res_size;
+	}
+      TMP_FREE;
+    }
+}

diff --git a/mpz/iset.c b/mpz/iset.c
new file mode 100644
index 0000000..252cada
--- /dev/null
+++ b/mpz/iset.c

@@ -0,0 +1,52 @@
+/* mpz_init_set (src_integer) -- Make a new multiple precision number with
+   a value copied from SRC_INTEGER.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init_set (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp, up;
+  mp_size_t usize, size;
+
+  usize = SIZ (u);
+  size = ABS (usize);
+
+  ALLOC (w) = MAX (size, 1);
+  wp = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC (w));
+
+  PTR (w) = wp;
+  up = PTR (u);
+
+  MPN_COPY (wp, up, size);
+  SIZ (w) = usize;
+}

diff --git a/mpz/iset_d.c b/mpz/iset_d.c
new file mode 100644
index 0000000..5d04a6f
--- /dev/null
+++ b/mpz/iset_d.c

@@ -0,0 +1,43 @@
+/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
+   value VAL.
+
+Copyright 1996, 2000, 2001, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init_set_d (mpz_ptr dest, double val)
+{
+  static const mp_limb_t dummy_limb=0xc1a0;
+
+  ALLOC (dest) = 0;
+  SIZ (dest) = 0;
+  PTR (dest) = (mp_ptr) &dummy_limb;
+  mpz_set_d (dest, val);
+}

diff --git a/mpz/iset_si.c b/mpz/iset_si.c
new file mode 100644
index 0000000..7179cb0
--- /dev/null
+++ b/mpz/iset_si.c

@@ -0,0 +1,58 @@
+/* mpz_init_set_si(dest,val) -- Make a new multiple precision in DEST and
+   assign VAL to the new number.
+
+Copyright 1991, 1993-1995, 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init_set_si (mpz_ptr dest, signed long int val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  ALLOC (dest) = 1;
+  PTR (dest) = __GMP_ALLOCATE_FUNC_LIMBS (1);
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  PTR (dest)[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if GMP_NAIL_BITS != 0
+  if (vl > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      PTR (dest)[1] = vl >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  SIZ (dest) = val >= 0 ? size : -size;
+}

diff --git a/mpz/iset_str.c b/mpz/iset_str.c
new file mode 100644
index 0000000..2df12f9
--- /dev/null
+++ b/mpz/iset_str.c

@@ -0,0 +1,47 @@
+/* mpz_init_set_str(string, base) -- Convert the \0-terminated string STRING in
+   base BASE to a multiple precision integer.  Allow white space in the string.
+   If BASE == 0 determine the base in the C standard way, i.e.  0xhh...h means
+   base 16, 0oo...o means base 8, otherwise assume base 10.
+
+Copyright 1991, 1993-1995, 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_init_set_str (mpz_ptr x, const char *str, int base)
+{
+  static const mp_limb_t dummy_limb=0xc1a0;
+  ALLOC (x) = 0;
+  PTR (x) = (mp_ptr) &dummy_limb;
+
+  /* if str has no digits mpz_set_str leaves x->_mp_size unset */
+  SIZ (x) = 0;
+
+  return mpz_set_str (x, str, base);
+}

diff --git a/mpz/iset_ui.c b/mpz/iset_ui.c
new file mode 100644
index 0000000..22a8e15
--- /dev/null
+++ b/mpz/iset_ui.c

@@ -0,0 +1,58 @@
+/* mpz_init_set_ui(dest,val) -- Make a new multiple precision in DEST and
+   assign VAL to the new number.
+
+Copyright 1991, 1993-1995, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_init_set_ui (mpz_ptr dest, unsigned long int val)
+{
+  mp_size_t size;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (val > GMP_NUMB_MAX)
+    {
+      ALLOC (dest) = 2;
+      PTR (dest) = __GMP_ALLOCATE_FUNC_LIMBS (2);
+      PTR (dest)[1] = val >> GMP_NUMB_BITS;
+      size = 2;
+    }
+  else
+#endif
+    {
+      ALLOC (dest) = 1;
+      PTR (dest) = __GMP_ALLOCATE_FUNC_LIMBS (1);
+
+      size = val != 0;
+    }
+  PTR (dest)[0] = val & GMP_NUMB_MASK;
+
+  SIZ (dest) = size;
+}

diff --git a/mpz/jacobi.c b/mpz/jacobi.c
new file mode 100644
index 0000000..cd556d7
--- /dev/null
+++ b/mpz/jacobi.c

@@ -0,0 +1,210 @@
+/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
+
+Copyright 2000-2002, 2005, 2010-2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This code does triple duty as mpz_jacobi, mpz_legendre and
+   mpz_kronecker. For ABI compatibility, the link symbol is
+   __gmpz_jacobi, not __gmpz_kronecker, even though the latter would
+   be more logical.
+
+   mpz_jacobi could assume b is odd, but the improvements from that seem
+   small compared to other operations, and anything significant should be
+   checked at run-time since we'd like odd b to go fast in mpz_kronecker
+   too.
+
+   mpz_legendre could assume b is an odd prime, but knowing this doesn't
+   present any obvious benefits.  Result 0 wouldn't arise (unless "a" is a
+   multiple of b), but the checking for that takes little time compared to
+   other operations.
+
+   Enhancements:
+
+   mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
+
+*/
+
+int
+mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
+{
+  mp_srcptr  asrcp, bsrcp;
+  mp_size_t  asize, bsize;
+  mp_limb_t  alow, blow;
+  mp_ptr     ap, bp;
+  unsigned   btwos;
+  int        result_bit1;
+  int        res;
+  TMP_DECL;
+
+  asize = SIZ(a);
+  asrcp = PTR(a);
+  alow = asrcp[0];
+
+  bsize = SIZ(b);
+  bsrcp = PTR(b);
+  blow = bsrcp[0];
+
+  /* The MPN jacobi functions require positive a and b, and b odd. So
+     we must to handle the cases of a or b zero, then signs, and then
+     the case of even b.
+  */
+
+  if (bsize == 0)
+    /* (a/0) = [ a = 1 or a = -1 ] */
+    return JACOBI_LS0 (alow, asize);
+
+  if (asize == 0)
+    /* (0/b) = [ b = 1 or b = - 1 ] */
+    return JACOBI_0LS (blow, bsize);
+
+  if ( (((alow | blow) & 1) == 0))
+    /* Common factor of 2 ==> (a/b) = 0 */
+    return 0;
+
+  if (bsize < 0)
+    {
+      /* (a/-1) = -1 if a < 0, +1 if a >= 0 */
+      result_bit1 = (asize < 0) << 1;
+      bsize = -bsize;
+    }
+  else
+    result_bit1 = 0;
+
+  JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
+
+  count_trailing_zeros (btwos, blow);
+  blow >>= btwos;
+
+  if (bsize > 1 && btwos > 0)
+    {
+      mp_limb_t b1 = bsrcp[1];
+      blow |= b1 << (GMP_NUMB_BITS - btwos);
+      if (bsize == 2 && (b1 >> btwos) == 0)
+	bsize = 1;
+    }
+
+  if (asize < 0)
+    {
+      /* (-1/b) = -1 iff b = 3 (mod 4) */
+      result_bit1 ^= JACOBI_N1B_BIT1(blow);
+      asize = -asize;
+    }
+
+  JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
+
+  /* Ensure asize >= bsize. Take advantage of the generalized
+     reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
+
+  if (asize < bsize)
+    {
+      MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
+      MP_LIMB_T_SWAP (alow, blow);
+
+      /* NOTE: The value of alow (old blow) is a bit subtle. For this code
+	 path, we get alow as the low, always odd, limb of shifted A. Which is
+	 what we need for the reciprocity update below.
+
+	 However, all other uses of alow assumes that it is *not*
+	 shifted. Luckily, alow matters only when either
+
+	 + btwos > 0, in which case A is always odd
+
+	 + asize == bsize == 1, in which case this code path is never
+	   taken. */
+
+      count_trailing_zeros (btwos, blow);
+      blow >>= btwos;
+
+      if (bsize > 1 && btwos > 0)
+	{
+	  mp_limb_t b1 = bsrcp[1];
+	  blow |= b1 << (GMP_NUMB_BITS - btwos);
+	  if (bsize == 2 && (b1 >> btwos) == 0)
+	    bsize = 1;
+	}
+
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
+    }
+
+  if (bsize == 1)
+    {
+      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
+
+      if (blow == 1)
+	return JACOBI_BIT1_TO_PN (result_bit1);
+
+      if (asize > 1)
+	JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
+
+      return mpn_jacobi_base (alow, blow, result_bit1);
+    }
+
+  /* Allocation strategy: For A, we allocate a working copy only for A % B, but
+     when A is much larger than B, we have to allocate space for the large
+     quotient. We use the same area, pointed to by bp, for both the quotient
+     A/B and the working copy of B. */
+
+  TMP_MARK;
+
+  if (asize >= 2*bsize)
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
+  else
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
+
+  /* In the case of even B, we conceptually shift out the powers of two first,
+     and then divide A mod B. Hence, when taking those powers of two into
+     account, we must use alow *before* the division. Doing the actual division
+     first is ok, because the point is to remove multiples of B from A, and
+     multiples of 2^k B are good enough. */
+  if (asize > bsize)
+    mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
+  else
+    MPN_COPY (ap, asrcp, bsize);
+
+  if (btwos > 0)
+    {
+      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
+
+      ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
+      bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
+    }
+  else
+    MPN_COPY (bp, bsrcp, bsize);
+
+  ASSERT (blow == bp[0]);
+  res = mpn_jacobi_n (ap, bp, bsize,
+		      mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
+
+  TMP_FREE;
+  return res;
+}

diff --git a/mpz/kronsz.c b/mpz/kronsz.c
new file mode 100644
index 0000000..92cc971
--- /dev/null
+++ b/mpz/kronsz.c

@@ -0,0 +1,137 @@
+/* mpz_si_kronecker -- long+mpz Kronecker/Jacobi symbol.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_si_kronecker (long a, mpz_srcptr b)
+{
+  mp_srcptr  b_ptr;
+  mp_limb_t  b_low;
+  mp_size_t  b_size;
+  mp_size_t  b_abs_size;
+  mp_limb_t  a_limb, b_rem;
+  unsigned   twos;
+  int        result_bit1;
+
+#if GMP_NUMB_BITS < BITS_PER_ULONG
+  if (a > GMP_NUMB_MAX || a < -GMP_NUMB_MAX)
+    {
+      mp_limb_t  alimbs[2];
+      mpz_t      az;
+      ALLOC(az) = numberof (alimbs);
+      PTR(az) = alimbs;
+      mpz_set_si (az, a);
+      return mpz_kronecker (az, b);
+    }
+#endif
+
+  b_size = SIZ (b);
+  if (b_size == 0)
+    return JACOBI_S0 (a);  /* (a/0) */
+
+  /* account for the effect of the sign of b, then ignore it */
+  result_bit1 = JACOBI_BSGN_SS_BIT1 (a, b_size);
+
+  b_ptr = PTR(b);
+  b_low = b_ptr[0];
+  b_abs_size = ABS (b_size);
+
+  if ((b_low & 1) != 0)
+    {
+      /* b odd */
+
+      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+      a_limb = ABS_CAST(mp_limb_t, a);
+
+      if ((a_limb & 1) == 0)
+	{
+	  /* (0/b)=1 for b=+/-1, 0 otherwise */
+	  if (a_limb == 0)
+	    return (b_abs_size == 1 && b_low == 1);
+
+	  /* a even, b odd */
+	  count_trailing_zeros (twos, a_limb);
+	  a_limb >>= twos;
+	  /* (a*2^n/b) = (a/b) * twos(n,a) */
+	  result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+	}
+    }
+  else
+    {
+      /* (even/even)=0, and (0/b)=0 for b!=+/-1 */
+      if ((a & 1) == 0)
+	return 0;
+
+      /* a odd, b even
+
+	 Establish shifted b_low with valid bit1 for ASGN and RECIP below.
+	 Zero limbs stripped are accounted for, but zero bits on b_low are
+	 not because they remain in {b_ptr,b_abs_size} for the
+	 JACOBI_MOD_OR_MODEXACT_1_ODD. */
+
+      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
+      if ((b_low & 1) == 0)
+	{
+	  if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+	    {
+	      /* need b_ptr[1] to get bit1 in b_low */
+	      if (b_abs_size == 1)
+		{
+		  /* (a/0x80000000) = (a/2)^(BPML-1) */
+		  if ((GMP_NUMB_BITS % 2) == 0)
+		    result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
+		  return JACOBI_BIT1_TO_PN (result_bit1);
+		}
+
+	      /* b_abs_size > 1 */
+	      b_low = b_ptr[1] << 1;
+	    }
+	  else
+	    {
+	      count_trailing_zeros (twos, b_low);
+	      b_low >>= twos;
+	    }
+	}
+
+      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+      a_limb = (unsigned long) ABS(a);
+    }
+
+  if (a_limb == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */
+
+  /* (a/b*2^n) = (b*2^n mod a / a) * recip(a,b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a_limb);
+  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a_limb, b_low);
+  return mpn_jacobi_base (b_rem, a_limb, result_bit1);
+}

diff --git a/mpz/kronuz.c b/mpz/kronuz.c
new file mode 100644
index 0000000..ba5c6dd
--- /dev/null
+++ b/mpz/kronuz.c

@@ -0,0 +1,129 @@
+/* mpz_ui_kronecker -- ulong+mpz Kronecker/Jacobi symbol.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+{
+  mp_srcptr  b_ptr;
+  mp_limb_t  b_low;
+  int        b_abs_size;
+  mp_limb_t  b_rem;
+  int        twos;
+  int        result_bit1;
+
+  /* (a/-1)=1 when a>=0, so the sign of b is ignored */
+  b_abs_size = ABSIZ (b);
+
+  if (b_abs_size == 0)
+    return JACOBI_U0 (a);  /* (a/0) */
+
+  if (a > GMP_NUMB_MAX)
+    {
+      mp_limb_t  alimbs[2];
+      mpz_t      az;
+      ALLOC(az) = numberof (alimbs);
+      PTR(az) = alimbs;
+      mpz_set_ui (az, a);
+      return mpz_kronecker (az, b);
+    }
+
+  b_ptr = PTR(b);
+  b_low = b_ptr[0];
+  result_bit1 = 0;
+
+  if (! (b_low & 1))
+    {
+      /* (0/b)=0 for b!=+/-1; and (even/even)=0 */
+      if (! (a & 1))
+	return 0;
+
+      /* a odd, b even
+
+	 Establish shifted b_low with valid bit1 for the RECIP below.  Zero
+	 limbs stripped are accounted for, but zero bits on b_low are not
+	 because they remain in {b_ptr,b_abs_size} for
+	 JACOBI_MOD_OR_MODEXACT_1_ODD. */
+
+      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
+      if (! (b_low & 1))
+	{
+	  if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+	    {
+	      /* need b_ptr[1] to get bit1 in b_low */
+	      if (b_abs_size == 1)
+		{
+		  /* (a/0x80...00) == (a/2)^(NUMB-1) */
+		  if ((GMP_NUMB_BITS % 2) == 0)
+		    {
+		      /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
+			 when GMP_NUMB_BITS is even, so it's still 0. */
+		      ASSERT (result_bit1 == 0);
+		      result_bit1 = JACOBI_TWO_U_BIT1 (a);
+		    }
+		  return JACOBI_BIT1_TO_PN (result_bit1);
+		}
+
+	      /* b_abs_size > 1 */
+	      b_low = b_ptr[1] << 1;
+	    }
+	  else
+	    {
+	      count_trailing_zeros (twos, b_low);
+	      b_low >>= twos;
+	    }
+	}
+    }
+  else
+    {
+      if (a == 0)        /* (0/b)=1 for b=+/-1, 0 otherwise */
+	return (b_abs_size == 1 && b_low == 1);
+
+      if (! (a & 1))
+	{
+	  /* a even, b odd */
+	  count_trailing_zeros (twos, a);
+	  a >>= twos;
+	  /* (a*2^n/b) = (a/b) * (2/a)^n */
+	  result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
+	}
+    }
+
+  if (a == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */
+
+  /* (a/b*2^n) = (b*2^n mod a / a) * RECIP(a,b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a);
+  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b_low);
+  return mpn_jacobi_base (b_rem, (mp_limb_t) a, result_bit1);
+}

diff --git a/mpz/kronzs.c b/mpz/kronzs.c
new file mode 100644
index 0000000..1f63f15
--- /dev/null
+++ b/mpz/kronzs.c

@@ -0,0 +1,92 @@
+/* mpz_kronecker_si -- mpz+long Kronecker/Jacobi symbol.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* After the absolute value of b is established it's treated as an unsigned
+   long, because 0x80..00 doesn't fit in a signed long. */
+
+int
+mpz_kronecker_si (mpz_srcptr a, long b)
+{
+  mp_srcptr  a_ptr;
+  mp_size_t  a_size;
+  mp_limb_t  a_rem, b_limb;
+  int        result_bit1;
+
+  a_size = SIZ(a);
+  if (a_size == 0)
+    return JACOBI_0S (b);
+
+#if GMP_NUMB_BITS < BITS_PER_ULONG
+  if (b > GMP_NUMB_MAX || b < -GMP_NUMB_MAX)
+    {
+      mp_limb_t  blimbs[2];
+      mpz_t      bz;
+      ALLOC(bz) = numberof (blimbs);
+      PTR(bz) = blimbs;
+      mpz_set_si (bz, b);
+      return mpz_kronecker (a, bz);
+    }
+#endif
+
+  result_bit1 = JACOBI_BSGN_SS_BIT1 (a_size, b);
+  b_limb = ABS_CAST (unsigned long, b);
+  a_ptr = PTR(a);
+
+  if ((b_limb & 1) == 0)
+    {
+      mp_limb_t  a_low = a_ptr[0];
+      int        twos;
+
+      if (b_limb == 0)
+	return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+
+      if (! (a_low & 1))
+	return 0;  /* (even/even)=0 */
+
+      /* (a/2)=(2/a) for a odd */
+      count_trailing_zeros (twos, b_limb);
+      b_limb >>= twos;
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, a_low);
+    }
+
+  if (b_limb == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */
+
+  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a_size, b_limb);
+  a_size = ABS(a_size);
+
+  /* (a/b) = (a mod b / b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b_limb);
+  return mpn_jacobi_base (a_rem, b_limb, result_bit1);
+}

diff --git a/mpz/kronzu.c b/mpz/kronzu.c
new file mode 100644
index 0000000..b4fbf79
--- /dev/null
+++ b/mpz/kronzu.c

@@ -0,0 +1,88 @@
+/* mpz_kronecker_ui -- mpz+ulong Kronecker/Jacobi symbol.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+{
+  mp_srcptr  a_ptr;
+  mp_size_t  a_size;
+  mp_limb_t  a_rem;
+  int        result_bit1;
+
+  a_size = SIZ(a);
+  if (a_size == 0)
+    return JACOBI_0U (b);
+
+  if (b > GMP_NUMB_MAX)
+    {
+      mp_limb_t  blimbs[2];
+      mpz_t      bz;
+      ALLOC(bz) = numberof (blimbs);
+      PTR(bz) = blimbs;
+      mpz_set_ui (bz, b);
+      return mpz_kronecker (a, bz);
+    }
+
+  a_ptr = PTR(a);
+  if ((b & 1) != 0)
+    {
+      result_bit1 = JACOBI_ASGN_SU_BIT1 (a_size, b);
+    }
+  else
+    {
+      mp_limb_t  a_low = a_ptr[0];
+      int        twos;
+
+      if (b == 0)
+	return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+
+      if (! (a_low & 1))
+	return 0;  /* (even/even)=0 */
+
+      /* (a/2)=(2/a) for a odd */
+      count_trailing_zeros (twos, b);
+      b >>= twos;
+      result_bit1 = (JACOBI_TWOS_U_BIT1 (twos, a_low)
+		     ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
+    }
+
+  if (b == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */
+
+  a_size = ABS(a_size);
+
+  /* (a/b) = (a mod b / b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b);
+  return mpn_jacobi_base (a_rem, (mp_limb_t) b, result_bit1);
+}

diff --git a/mpz/lcm.c b/mpz/lcm.c
new file mode 100644
index 0000000..2807ef7
--- /dev/null
+++ b/mpz/lcm.c

@@ -0,0 +1,87 @@
+/* mpz_lcm -- mpz/mpz least common multiple.
+
+Copyright 1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_t g;
+  mp_size_t usize, vsize;
+  TMP_DECL;
+
+  usize = SIZ (u);
+  vsize = SIZ (v);
+  if (usize == 0 || vsize == 0)
+    {
+      SIZ (r) = 0;
+      return;
+    }
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (vsize == 1 || usize == 1)
+    {
+      mp_limb_t  vl, gl, c;
+      mp_srcptr  up;
+      mp_ptr     rp;
+
+      if (usize == 1)
+	{
+	  usize = vsize;
+	  MPZ_SRCPTR_SWAP (u, v);
+	}
+
+      MPZ_REALLOC (r, usize+1);
+
+      up = PTR(u);
+      vl = PTR(v)[0];
+      gl = mpn_gcd_1 (up, usize, vl);
+      vl /= gl;
+
+      rp = PTR(r);
+      c = mpn_mul_1 (rp, up, usize, vl);
+      rp[usize] = c;
+      usize += (c != 0);
+      SIZ(r) = usize;
+      return;
+    }
+
+  TMP_MARK;
+  MPZ_TMP_INIT (g, usize); /* v != 0 implies |gcd(u,v)| <= |u| */
+
+  mpz_gcd (g, u, v);
+  mpz_divexact (g, u, g);
+  mpz_mul (r, g, v);
+
+  SIZ (r) = ABS (SIZ (r));	/* result always positive */
+
+  TMP_FREE;
+}

diff --git a/mpz/lcm_ui.c b/mpz/lcm_ui.c
new file mode 100644
index 0000000..1f199b7
--- /dev/null
+++ b/mpz/lcm_ui.c

@@ -0,0 +1,78 @@
+/* mpz_lcm_ui -- least common multiple of mpz and ulong.
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpz_lcm_ui (mpz_ptr r, mpz_srcptr u, unsigned long v)
+{
+  mp_size_t      usize;
+  mp_srcptr      up;
+  mp_ptr         rp;
+  unsigned long  g;
+  mp_limb_t      c;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpz_t vz;
+      mp_limb_t vlimbs[2];
+      vlimbs[0] = v & GMP_NUMB_MASK;
+      vlimbs[1] = v >> GMP_NUMB_BITS;
+      PTR(vz) = vlimbs;
+      SIZ(vz) = 2;
+      mpz_lcm (r, u, vz);
+      return;
+    }
+#endif
+
+  /* result zero if either operand zero */
+  usize = SIZ(u);
+  if (usize == 0 || v == 0)
+    {
+      SIZ(r) = 0;
+      return;
+    }
+  usize = ABS(usize);
+
+  MPZ_REALLOC (r, usize+1);
+
+  up = PTR(u);
+  g = (unsigned long) mpn_gcd_1 (up, usize, (mp_limb_t) v);
+  v /= g;
+
+  rp = PTR(r);
+  c = mpn_mul_1 (rp, up, usize, (mp_limb_t) v);
+  rp[usize] = c;
+  usize += (c != 0);
+  SIZ(r) = usize;
+}

diff --git a/mpz/limbs_finish.c b/mpz/limbs_finish.c
new file mode 100644
index 0000000..a02839d
--- /dev/null
+++ b/mpz/limbs_finish.c

@@ -0,0 +1,39 @@
+/* mpz_finish_limbs -- Update mpz after writing to the limb array.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_limbs_finish (mpz_ptr x, mp_size_t xs)
+{
+  mp_size_t xn = ABS(xs);
+  MPN_NORMALIZE (PTR (x), xn);
+  SIZ (x) = xs < 0 ? -xn : xn;
+}

diff --git a/mpz/limbs_modify.c b/mpz/limbs_modify.c
new file mode 100644
index 0000000..a778b6e
--- /dev/null
+++ b/mpz/limbs_modify.c

@@ -0,0 +1,38 @@
+/* mpz_limbs_modify -- Read-and-modify access to the mpn-style limb array.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_ptr
+mpz_limbs_modify (mpz_ptr x, mp_size_t n)
+{
+  ASSERT (n > 0);
+  return MPZ_REALLOC (x, n);
+}

diff --git a/mpz/limbs_read.c b/mpz/limbs_read.c
new file mode 100644
index 0000000..705f0c1
--- /dev/null
+++ b/mpz/limbs_read.c

@@ -0,0 +1,37 @@
+/* mpz_limbs_read -- Read access to the mpn-style limb array.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_srcptr
+mpz_limbs_read (mpz_srcptr x)
+{
+  return PTR(x);
+}

diff --git a/mpz/limbs_write.c b/mpz/limbs_write.c
new file mode 100644
index 0000000..b116ad0
--- /dev/null
+++ b/mpz/limbs_write.c

@@ -0,0 +1,38 @@
+/* mpz_limbs_write -- Write access to the mpn-style limb array.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_ptr
+mpz_limbs_write (mpz_ptr x, mp_size_t n)
+{
+  ASSERT (n > 0);
+  return MPZ_NEWALLOC (x, n);
+}

diff --git a/mpz/lucmod.c b/mpz/lucmod.c
new file mode 100644
index 0000000..0dad48c
--- /dev/null
+++ b/mpz/lucmod.c

@@ -0,0 +1,127 @@
+/* mpz_lucas_mod -- Helper function for the strong Lucas
+   primality test.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2018 Free Software Foundation, Inc.
+
+Contributed by Marco Bodrato.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* Computes V_{k+1}, Q^{k+1} (mod n) for the Lucas' sequence	*/
+/* with P=1, Q=Q; k = n>>b0.	*/
+/* Requires n > 4; b0 > 0; -2*Q must not overflow a long.	*/
+/* If U_{k+1}==0 (mod n) or V_{k+1}==0 (mod n), it returns 1,	*/
+/* otherwise it returns 0 and sets V=V_{k+1} and Qk=Q^{k+1}.	*/
+/* V will never grow beyond SIZ(n), Qk not beyond 2*SIZ(n).	*/
+int
+mpz_lucas_mod (mpz_ptr V, mpz_ptr Qk, long Q,
+	       mp_bitcnt_t b0, mpz_srcptr n, mpz_ptr T1, mpz_ptr T2)
+{
+  mp_bitcnt_t bs;
+  int res;
+
+  ASSERT (b0 > 0);
+  ASSERT (SIZ (n) > 1 || SIZ (n) > 0 && PTR (n) [0] > 4);
+
+  mpz_set_ui (V, 1); /* U1 = 1 */
+  bs = mpz_sizeinbase (n, 2) - 2;
+  if (UNLIKELY (bs < b0))
+    {
+      /* n = 2^b0 - 1, should we use Lucas-Lehmer instead? */
+      ASSERT (bs == b0 - 2);
+      mpz_set_si (Qk, Q);
+      return 0;
+    }
+  mpz_set_ui (Qk, 1); /* U2 = 1 */
+
+  do
+    {
+      /* We use the iteration suggested in "Elementary Number Theory"	*/
+      /* by Peter Hackman (November 1, 2009), section "L.XVII Scalar	*/
+      /* Formulas", from http://hackmat.se/kurser/TATM54/booktot.pdf	*/
+      /* U_{2k} = 2*U_{k+1}*U_k - P*U_k^2	*/
+      /* U_{2k+1} = U_{k+1}^2  - Q*U_k^2	*/
+      /* U_{2k+2} = P*U_{k+1}^2 - 2*Q*U_{k+1}*U_k	*/
+      /* We note that U_{2k+2} = P*U_{2k+1} - Q*U_{2k}	*/
+      /* The formulas are specialized for P=1, and only squares:	*/
+      /* U_{2k}   = U_{k+1}^2 - |U_{k+1} - U_k|^2	*/
+      /* U_{2k+1} = U_{k+1}^2 - Q*U_k^2		*/
+      /* U_{2k+2} = U_{2k+1}  - Q*U_{2k}	*/
+      mpz_mul (T1, Qk, Qk);	/* U_{k+1}^2		*/
+      mpz_sub (Qk, V, Qk);	/* |U_{k+1} - U_k|	*/
+      mpz_mul (T2, Qk, Qk);	/* |U_{k+1} - U_k|^2	*/
+      mpz_mul (Qk, V, V);	/* U_k^2		*/
+      mpz_sub (T2, T1, T2);	/* U_{k+1}^2 - (U_{k+1} - U_k)^2	*/
+      if (Q > 0)		/* U_{k+1}^2 - Q U_k^2 = U_{2k+1}	*/
+	mpz_submul_ui (T1, Qk, Q);
+      else
+	mpz_addmul_ui (T1, Qk, NEG_CAST (unsigned long, Q));
+
+      /* A step k->k+1 is performed if the bit in $n$ is 1	*/
+      if (mpz_tstbit (n, bs))
+	{
+	  /* U_{2k+2} = U_{2k+1} - Q*U_{2k}	*/
+	  mpz_mul_si (T2, T2, Q);
+	  mpz_sub (T2, T1, T2);
+	  mpz_swap (T1, T2);
+	}
+      mpz_tdiv_r (Qk, T1, n);
+      mpz_tdiv_r (V, T2, n);
+    } while (--bs >= b0);
+
+  res = SIZ (Qk) == 0;
+  if (!res) {
+    mpz_mul_si (T1, V, -2*Q);
+    mpz_add (T1, Qk, T1);	/* V_k = U_k - 2Q*U_{k-1} */
+    mpz_tdiv_r (V, T1, n);
+    res = SIZ (V) == 0;
+    if (!res && b0 > 1) {
+      /* V_k and Q^k will be needed for further check, compute them.	*/
+      /* FIXME: Here we compute V_k^2 and store V_k, but the former	*/
+      /* will be recomputed by the calling function, shoul we store	*/
+      /* that instead?							*/
+      mpz_mul (T2, T1, T1);	/* V_k^2 */
+      mpz_mul (T1, Qk, Qk);	/* P^2 U_k^2 = U_k^2 */
+      mpz_sub (T2, T2, T1);
+      ASSERT (SIZ (T2) == 0 || PTR (T2) [0] % 4 == 0);
+      mpz_tdiv_q_2exp (T2, T2, 2);	/* (V_k^2 - P^2 U_k^2) / 4 */
+      if (Q > 0)		/* (V_k^2 - (P^2 -4Q) U_k^2) / 4 = Q^k */
+	mpz_addmul_ui (T2, T1, Q);
+      else
+	mpz_submul_ui (T2, T1, NEG_CAST (unsigned long, Q));
+      mpz_tdiv_r (Qk, T2, n);
+    }
+  }
+
+  return res;
+}

diff --git a/mpz/lucnum2_ui.c b/mpz/lucnum2_ui.c
new file mode 100644
index 0000000..3cc3d7b
--- /dev/null
+++ b/mpz/lucnum2_ui.c

@@ -0,0 +1,94 @@
+/* mpz_lucnum2_ui -- calculate Lucas numbers.
+
+Copyright 2001, 2003, 2005, 2012, 2015, 2016, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+void
+mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
+{
+  mp_ptr     lp, l1p, f1p;
+  mp_size_t  size;
+  mp_limb_t  c;
+  TMP_DECL;
+
+  ASSERT (ln != lnsub1);
+
+  /* handle small n quickly, and hide the special case for L[-1]=-1 */
+  if (n <= FIB_TABLE_LUCNUM_LIMIT)
+    {
+      mp_limb_t  f  = FIB_TABLE (n);
+      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);
+
+      /* L[n] = F[n] + 2F[n-1] */
+      MPZ_NEWALLOC (ln, 1)[0] = f + 2*f1;
+      SIZ(ln) = 1;
+
+      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */
+      MPZ_NEWALLOC (lnsub1, 1)[0] = (n == 0 ? 1 : 2*f - f1);
+      SIZ(lnsub1) = (n == 0 ? -1 : 1);
+
+      return;
+    }
+
+  TMP_MARK;
+  size = MPN_FIB2_SIZE (n);
+  f1p = TMP_ALLOC_LIMBS (size);
+
+  lp  = MPZ_NEWALLOC (ln,     size+1);
+  l1p = MPZ_NEWALLOC (lnsub1, size+1);
+
+  size = mpn_fib2_ui (l1p, f1p, n);
+
+  /* L[n] = F[n] + 2F[n-1] */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  c = mpn_addlsh1_n (lp, l1p, f1p, size);
+#else
+  c = mpn_lshift (lp, f1p, size, 1);
+  c += mpn_add_n (lp, lp, l1p, size);
+#endif
+  lp[size] = c;
+  SIZ(ln) = size + (c != 0);
+
+  /* L[n-1] = 2F[n] - F[n-1] */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  c = mpn_rsblsh1_n (l1p, f1p, l1p, size);
+#else
+  c = mpn_lshift (l1p, l1p, size, 1);
+  c -= mpn_sub_n (l1p, l1p, f1p, size);
+#endif
+  ASSERT ((mp_limb_signed_t) c >= 0);
+  l1p[size] = c;
+  SIZ(lnsub1) = size + (c != 0);
+
+  TMP_FREE;
+}

diff --git a/mpz/lucnum_ui.c b/mpz/lucnum_ui.c
new file mode 100644
index 0000000..4213bb7
--- /dev/null
+++ b/mpz/lucnum_ui.c

@@ -0,0 +1,208 @@
+/* mpz_lucnum_ui -- calculate Lucas number.
+
+Copyright 2001, 2003, 2005, 2011, 2012, 2015, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Notes:
+
+   For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
+   there can't be an overflow applying +4 to just the low limb (since that
+   would leave 0, 1, 2 or 3 mod 8).
+
+   For the -4 in L[2k+1] when k is even, it seems (no proof) that
+   L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
+   L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
+   low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
+   conceivable to calculate it, so it probably should be handled.
+
+   For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
+   2^b, so for instance in 32-bits L[0x80000000] has a low limb of
+   0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
+   obviously huge, but probably should be made to work.  */
+
+void
+mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
+{
+  mp_size_t  lalloc, xalloc, lsize, xsize;
+  mp_ptr     lp, xp;
+  mp_limb_t  c;
+  int        zeros;
+  TMP_DECL;
+
+  TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
+
+  if (n <= FIB_TABLE_LUCNUM_LIMIT)
+    {
+      /* L[n] = F[n] + 2F[n-1] */
+      MPZ_NEWALLOC (ln, 1)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
+      SIZ(ln) = 1;
+      return;
+    }
+
+  /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
+     since square or mul used below might need an extra limb over the true
+     size */
+  lalloc = MPN_FIB2_SIZE (n) + 2;
+  lp = MPZ_NEWALLOC (ln, lalloc);
+
+  TMP_MARK;
+  xalloc = lalloc;
+  xp = TMP_ALLOC_LIMBS (xalloc);
+
+  /* Strip trailing zeros from n, until either an odd number is reached
+     where the L[2k+1] formula can be used, or until n fits within the
+     FIB_TABLE data.  The table is preferred of course.  */
+  zeros = 0;
+  for (;;)
+    {
+      if (n & 1)
+	{
+	  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
+
+	  mp_size_t  yalloc, ysize;
+	  mp_ptr     yp;
+
+	  TRACE (printf ("  initial odd n=%lu\n", n));
+
+	  yalloc = MPN_FIB2_SIZE (n/2);
+	  yp = TMP_ALLOC_LIMBS (yalloc);
+	  ASSERT (xalloc >= yalloc);
+
+	  xsize = mpn_fib2_ui (xp, yp, n/2);
+
+	  /* possible high zero on F[k-1] */
+	  ysize = xsize;
+	  ysize -= (yp[ysize-1] == 0);
+	  ASSERT (yp[ysize-1] != 0);
+
+	  /* xp = 2*F[k] + F[k-1] */
+#if HAVE_NATIVE_mpn_addlsh1_n
+	  c = mpn_addlsh1_n (xp, yp, xp, xsize);
+#else
+	  c = mpn_lshift (xp, xp, xsize, 1);
+	  c += mpn_add_n (xp, xp, yp, xsize);
+#endif
+	  ASSERT (xalloc >= xsize+1);
+	  xp[xsize] = c;
+	  xsize += (c != 0);
+	  ASSERT (xp[xsize-1] != 0);
+
+	  ASSERT (lalloc >= xsize + ysize);
+	  c = mpn_mul (lp, xp, xsize, yp, ysize);
+	  lsize = xsize + ysize;
+	  lsize -= (c == 0);
+
+	  /* lp = 5*lp */
+#if HAVE_NATIVE_mpn_addlsh2_n
+	  c = mpn_addlsh2_n (lp, lp, lp, lsize);
+#else
+	  /* FIXME: Is this faster than mpn_mul_1 ? */
+	  c = mpn_lshift (xp, lp, lsize, 2);
+	  c += mpn_add_n (lp, lp, xp, lsize);
+#endif
+	  ASSERT (lalloc >= lsize+1);
+	  lp[lsize] = c;
+	  lsize += (c != 0);
+
+	  /* lp = lp - 4*(-1)^k */
+	  if (n & 2)
+	    {
+	      /* no overflow, see comments above */
+	      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
+	      lp[0] += 4;
+	    }
+	  else
+	    {
+	      /* won't go negative */
+	      MPN_DECR_U (lp, lsize, CNST_LIMB(4));
+	    }
+
+	  TRACE (mpn_trace ("  l",lp, lsize));
+	  break;
+	}
+
+      MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
+      zeros++;
+      n /= 2;
+
+      if (n <= FIB_TABLE_LUCNUM_LIMIT)
+	{
+	  /* L[n] = F[n] + 2F[n-1] */
+	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
+	  lsize = 1;
+
+	  TRACE (printf ("  initial small n=%lu\n", n);
+		 mpn_trace ("  l",lp, lsize));
+	  break;
+	}
+    }
+
+  for ( ; zeros != 0; zeros--)
+    {
+      /* L[2k] = L[k]^2 + 2*(-1)^k */
+
+      TRACE (printf ("  zeros=%d\n", zeros));
+
+      ASSERT (xalloc >= 2*lsize);
+      mpn_sqr (xp, lp, lsize);
+      lsize *= 2;
+      lsize -= (xp[lsize-1] == 0);
+
+      /* First time around the loop k==n determines (-1)^k, after that k is
+	 always even and we set n=0 to indicate that.  */
+      if (n & 1)
+	{
+	  /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
+	  ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
+	  xp[0] += 2;
+	  n = 0;
+	}
+      else
+	{
+	  /* won't go negative */
+	  MPN_DECR_U (xp, lsize, CNST_LIMB(2));
+	}
+
+      MP_PTR_SWAP (xp, lp);
+      ASSERT (lp[lsize-1] != 0);
+    }
+
+  /* should end up in the right spot after all the xp/lp swaps */
+  ASSERT (lp == PTR(ln));
+  SIZ(ln) = lsize;
+
+  TMP_FREE;
+}

diff --git a/mpz/mfac_uiui.c b/mpz/mfac_uiui.c
new file mode 100644
index 0000000..8595a9b
--- /dev/null
+++ b/mpz/mfac_uiui.c

@@ -0,0 +1,140 @@
+/* mpz_mfac_uiui(RESULT, N, M) -- Set RESULT to N!^(M) = N(N-M)(N-2M)...
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012, 2013, 2015, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+/*********************************************************/
+/* Section oder factorials:                              */
+/*********************************************************/
+
+/* mpz_mfac_uiui (x, n, m) computes x = n!^(m) = n*(n-m)*(n-2m)*...   */
+
+void
+mpz_mfac_uiui (mpz_ptr x, unsigned long n, unsigned long m)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+  ASSERT (m != 0);
+
+  if ((n < 3) | (n - 3 < m - 1)) { /* (n < 3 || n - 1 <= m || m == 0) */
+    MPZ_NEWALLOC (x, 1)[0] = n + (n == 0);
+    SIZ (x) = 1;
+  } else { /* 0 < m < n - 1 < GMP_NUMB_MAX */
+    mp_limb_t g, sn;
+    mpz_t     t;
+
+    sn = n;
+    g = mpn_gcd_1 (&sn, 1, m);
+    if (g > 1) { n/=g; m/=g; }
+
+    if (m <= 2) { /* fac or 2fac */
+      if (m == 1) {
+	if (g > 2) {
+	  mpz_init (t);
+	  mpz_fac_ui (t, n);
+	  sn = n;
+	} else {
+	  if (g == 2)
+	    mpz_2fac_ui (x, n << 1);
+	  else
+	    mpz_fac_ui (x, n);
+	  return;
+	}
+      } else { /* m == 2 */
+	if (g > 1) {
+	  mpz_init (t);
+	  mpz_2fac_ui (t, n);
+	  sn = n / 2 + 1;
+	} else {
+	  mpz_2fac_ui (x, n);
+	  return;
+	}
+      }
+    } else { /* m >= 3, gcd(n,m) = 1 */
+      mp_limb_t *factors;
+      mp_limb_t prod, max_prod;
+      mp_size_t j;
+      TMP_DECL;
+
+      sn = n / m + 1;
+
+      j = 0;
+      prod = n;
+      n -= m;
+      max_prod = GMP_NUMB_MAX / n;
+
+      if (g > 1)
+	factors = MPZ_NEWALLOC (x, sn / log_n_max (n) + 2);
+      else {
+	TMP_MARK;
+	factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2);
+      }
+
+      for (; n > m; n -= m)
+	FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+      factors[j++] = n;
+      factors[j++] = prod;
+
+      if (g > 1) {
+	mpz_init (t);
+	mpz_prodlimbs (t, factors, j);
+      } else {
+	mpz_prodlimbs (x, factors, j);
+	TMP_FREE;
+	return;
+      }
+    }
+
+    {
+      mpz_t p;
+
+      mpz_init (p);
+      mpz_ui_pow_ui (p, g, sn); /* g^sn */
+      mpz_mul (x, p, t);
+      mpz_clear (p);
+      mpz_clear (t);
+    }
+  }
+}

diff --git a/mpz/millerrabin.c b/mpz/millerrabin.c
new file mode 100644
index 0000000..98c4d6a
--- /dev/null
+++ b/mpz/millerrabin.c

@@ -0,0 +1,216 @@
+/* mpz_millerrabin(n,reps) -- An implementation of the probabilistic primality
+   test found in Knuth's Seminumerical Algorithms book.  If the function
+   mpz_millerrabin() returns 0 then n is not prime.  If it returns 1, then n is
+   'probably' prime.  The probability of a false positive is (1/4)**reps, where
+   reps is the number of internal passes of the probabilistic algorithm.  Knuth
+   indicates that 25 passes are reasonable.
+
+   With the current implementation, the first 24 MR-tests are substituted by a
+   Baillie-PSW probable prime test.
+
+   This implementation of the Baillie-PSW test was checked up to 2463*10^12,
+   for smaller values no MR-test is performed, regardless of reps, and
+   2 ("surely prime") is returned if the number was not proved composite.
+
+   If GMP_BPSW_NOFALSEPOSITIVES_UPTO_64BITS is defined as non-zero,
+   the code assumes that the Baillie-PSW test was checked up to 2^64.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1996-2002, 2005, 2014, 2018-2022 Free
+Software Foundation, Inc.
+
+Contributed by John Amanatides.
+Changed to "BPSW, then Miller Rabin if required" by Marco Bodrato.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#ifndef GMP_BPSW_NOFALSEPOSITIVES_UPTO_64BITS
+#define GMP_BPSW_NOFALSEPOSITIVES_UPTO_64BITS 0
+#endif
+
+static int millerrabin (mpz_srcptr,
+			mpz_ptr, mpz_ptr,
+			mpz_srcptr, unsigned long int);
+
+int
+mpz_millerrabin (mpz_srcptr n, int reps)
+{
+  mpz_t nm, x, y, q;
+  mp_bitcnt_t k;
+  int is_prime;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (SIZ (n) > 0);
+  MPZ_TMP_INIT (nm, SIZ (n) + 1);
+  mpz_tdiv_q_2exp (nm, n, 1);
+
+  MPZ_TMP_INIT (x, SIZ (n) + 1);
+  MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */
+  MPZ_TMP_INIT (q, SIZ (n));
+
+  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
+  k = mpn_scan1 (PTR (nm), 0);
+  mpz_tdiv_q_2exp (q, nm, k);
+  ++k;
+
+  /* BPSW test */
+  mpz_set_ui (x, 2);
+  is_prime = millerrabin (n, x, y, q, k) && mpz_stronglucas (n, x, y);
+
+  if (is_prime)
+    {
+      if (
+#if GMP_BPSW_NOFALSEPOSITIVES_UPTO_64BITS
+	  /* Consider numbers up to 2^64 that pass the BPSW test as primes. */
+#if GMP_NUMB_BITS <= 64
+	  SIZ (n) <= 64 / GMP_NUMB_BITS
+#else
+	  0
+#endif
+#if 64 % GMP_NUMB_BITS != 0
+	  || SIZ (n) - 64 / GMP_NUMB_BITS == (PTR (n) [64 / GMP_NUMB_BITS] < CNST_LIMB(1) << 64 % GMP_NUMB_BITS)
+#endif
+#else
+	  /* Consider numbers up to 35*2^46 that pass the BPSW test as primes.
+	     This implementation was tested up to 2463*10^12 > 2^51+2^47+2^46 */
+	  /* 2^5 < 35 = 0b100011 < 2^6 */
+#define GMP_BPSW_LIMB_CONST CNST_LIMB(35)
+#define GMP_BPSW_BITS_CONST (LOG2C(35) - 1)
+#define GMP_BPSW_BITS_LIMIT (46 + GMP_BPSW_BITS_CONST)
+
+#define GMP_BPSW_LIMBS_LIMIT (GMP_BPSW_BITS_LIMIT / GMP_NUMB_BITS)
+#define GMP_BPSW_BITS_MOD (GMP_BPSW_BITS_LIMIT % GMP_NUMB_BITS)
+
+#if GMP_NUMB_BITS <=  GMP_BPSW_BITS_LIMIT
+	  SIZ (n) <= GMP_BPSW_LIMBS_LIMIT
+#else
+	  0
+#endif
+#if GMP_BPSW_BITS_MOD >=  GMP_BPSW_BITS_CONST
+	  || SIZ (n) - GMP_BPSW_LIMBS_LIMIT == (PTR (n) [GMP_BPSW_LIMBS_LIMIT] < GMP_BPSW_LIMB_CONST << (GMP_BPSW_BITS_MOD - GMP_BPSW_BITS_CONST))
+#else
+#if GMP_BPSW_BITS_MOD != 0
+	  || SIZ (n) - GMP_BPSW_LIMBS_LIMIT == (PTR (n) [GMP_BPSW_LIMBS_LIMIT] < GMP_BPSW_LIMB_CONST >> (GMP_BPSW_BITS_CONST -  GMP_BPSW_BITS_MOD))
+#else
+#if GMP_NUMB_BITS > GMP_BPSW_BITS_CONST
+	  || SIZ (nm) - GMP_BPSW_LIMBS_LIMIT + 1 == (PTR (nm) [GMP_BPSW_LIMBS_LIMIT - 1] < GMP_BPSW_LIMB_CONST << (GMP_NUMB_BITS - 1 - GMP_BPSW_BITS_CONST))
+#endif
+#endif
+#endif
+
+#undef GMP_BPSW_BITS_LIMIT
+#undef GMP_BPSW_LIMB_CONST
+#undef GMP_BPSW_BITS_CONST
+#undef GMP_BPSW_LIMBS_LIMIT
+#undef GMP_BPSW_BITS_MOD
+
+#endif
+	  )
+	is_prime = 2;
+      else
+	{
+	  reps -= 24;
+	  if (reps > 0)
+	    {
+	      gmp_randstate_t rstate;
+	      /* (n-5)/2 */
+	      mpz_sub_ui (nm, nm, 2L);
+	      ASSERT (mpz_cmp_ui (nm, 1L) >= 0);
+
+	      gmp_randinit_default (rstate);
+
+	      do
+		{
+		  /* 3 to (n-1)/2 inclusive, don't want 1, 0 or 2 */
+		  mpz_urandomm (x, rstate, nm);
+		  mpz_add_ui (x, x, 3L);
+
+		  is_prime = millerrabin (n, x, y, q, k);
+		} while (--reps > 0 && is_prime);
+
+	      gmp_randclear (rstate);
+	    }
+	}
+    }
+  TMP_FREE;
+  return is_prime;
+}
+
+static int
+mod_eq_m1 (mpz_srcptr x, mpz_srcptr m)
+{
+  mp_size_t ms;
+  mp_srcptr mp, xp;
+
+  ms = SIZ (m);
+  if (SIZ (x) != ms)
+    return 0;
+  ASSERT (ms > 0);
+
+  mp = PTR (m);
+  xp = PTR (x);
+  ASSERT ((mp[0] - 1) == (mp[0] ^ 1)); /* n is odd */
+
+  if ((*xp ^ CNST_LIMB(1) ^ *mp) != CNST_LIMB(0)) /* xp[0] != mp[0] - 1 */
+    return 0;
+  else
+    {
+      int cmp;
+
+      --ms;
+      ++xp;
+      ++mp;
+
+      MPN_CMP (cmp, xp, mp, ms);
+
+      return cmp == 0;
+    }
+}
+
+static int
+millerrabin (mpz_srcptr n, mpz_ptr x, mpz_ptr y,
+	     mpz_srcptr q, mp_bitcnt_t k)
+{
+  mpz_powm (y, x, q, n);
+
+  if (mpz_cmp_ui (y, 1L) == 0 || mod_eq_m1 (y, n))
+    return 1;
+
+  for (mp_bitcnt_t i = 1; i < k; ++i)
+    {
+      mpz_powm_ui (y, y, 2L, n);
+      if (mod_eq_m1 (y, n))
+	return 1;
+    }
+  return 0;
+}

diff --git a/mpz/mod.c b/mpz/mod.c
new file mode 100644
index 0000000..ab5cdb1
--- /dev/null
+++ b/mpz/mod.c

@@ -0,0 +1,67 @@
+/* mpz_mod -- The mathematical mod function.
+
+Copyright 1991, 1993-1996, 2001, 2002, 2005, 2010, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t rn, bn;
+  mpz_t temp_divisor;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  bn = ABSIZ(divisor);
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+      PTR(temp_divisor) = TMP_ALLOC_LIMBS (bn);
+      MPN_COPY (PTR(temp_divisor), PTR(divisor), bn);
+    }
+  else
+    {
+      PTR(temp_divisor) = PTR(divisor);
+    }
+  SIZ(temp_divisor) = bn;
+  divisor = temp_divisor;
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  rn = SIZ (rem);
+  if (rn < 0)
+    mpz_add (rem, rem, divisor);
+
+  TMP_FREE;
+}

diff --git a/mpz/mul.c b/mpz/mul.c
new file mode 100644
index 0000000..a70df9f
--- /dev/null
+++ b/mpz/mul.c

@@ -0,0 +1,159 @@
+/* mpz_mul -- Multiply two integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011, 2012,
+2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+void
+mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mp_size_t usize;
+  mp_size_t vsize;
+  mp_size_t wsize;
+  mp_size_t sign_product;
+  mp_ptr up, vp;
+  mp_ptr wp;
+  mp_ptr free_me;
+  size_t free_me_size;
+  mp_limb_t cy_limb;
+  TMP_DECL;
+
+  usize = SIZ (u);
+  vsize = SIZ (v);
+  sign_product = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (usize < vsize)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (usize, vsize);
+    }
+
+  if (vsize == 0)
+    {
+      SIZ (w) = 0;
+      return;
+    }
+
+#if HAVE_NATIVE_mpn_mul_2
+  if (vsize <= 2)
+    {
+      wp = MPZ_REALLOC (w, usize+vsize);
+      if (vsize == 1)
+	cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
+      else
+	{
+	  cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v));
+	  usize++;
+	}
+      wp[usize] = cy_limb;
+      usize += (cy_limb != 0);
+      SIZ (w) = (sign_product >= 0 ? usize : -usize);
+      return;
+    }
+#else
+  if (vsize == 1)
+    {
+      wp = MPZ_REALLOC (w, usize+1);
+      cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
+      wp[usize] = cy_limb;
+      usize += (cy_limb != 0);
+      SIZ (w) = (sign_product >= 0 ? usize : -usize);
+      return;
+    }
+#endif
+
+  TMP_MARK;
+  free_me = NULL;
+  up = PTR (u);
+  vp = PTR (v);
+  wp = PTR (w);
+
+  /* Ensure W has space enough to store the result.  */
+  wsize = usize + vsize;
+  if (ALLOC (w) < wsize)
+    {
+      if (ALLOC (w) != 0)
+	{
+	  if (wp == up || wp == vp)
+	    {
+	      free_me = wp;
+	      free_me_size = ALLOC (w);
+	    }
+	  else
+	    (*__gmp_free_func) (wp, (size_t) ALLOC (w) * GMP_LIMB_BYTES);
+	}
+
+      ALLOC (w) = wsize;
+      wp = __GMP_ALLOCATE_FUNC_LIMBS (wsize);
+      PTR (w) = wp;
+    }
+  else
+    {
+      /* Make U and V not overlap with W.  */
+      if (wp == up)
+	{
+	  /* W and U are identical.  Allocate temporary space for U.  */
+	  up = TMP_ALLOC_LIMBS (usize);
+	  /* Is V identical too?  Keep it identical with U.  */
+	  if (wp == vp)
+	    vp = up;
+	  /* Copy to the temporary space.  */
+	  MPN_COPY (up, wp, usize);
+	}
+      else if (wp == vp)
+	{
+	  /* W and V are identical.  Allocate temporary space for V.  */
+	  vp = TMP_ALLOC_LIMBS (vsize);
+	  /* Copy to the temporary space.  */
+	  MPN_COPY (vp, wp, vsize);
+	}
+    }
+
+  if (up == vp)
+    {
+      mpn_sqr (wp, up, usize);
+      cy_limb = wp[wsize - 1];
+    }
+  else
+    {
+      cy_limb = mpn_mul (wp, up, usize, vp, vsize);
+    }
+
+  wsize -= cy_limb == 0;
+
+  SIZ (w) = sign_product < 0 ? -wsize : wsize;
+  if (free_me != NULL)
+    (*__gmp_free_func) (free_me, free_me_size * GMP_LIMB_BYTES);
+  TMP_FREE;
+}

diff --git a/mpz/mul_2exp.c b/mpz/mul_2exp.c
new file mode 100644
index 0000000..6144d0d
--- /dev/null
+++ b/mpz/mul_2exp.c

@@ -0,0 +1,72 @@
+/* mpz_mul_2exp -- Multiply a bignum by 2**CNT
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_mul_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  mp_size_t un, rn;
+  mp_size_t limb_cnt;
+  mp_ptr rp;
+  mp_srcptr up;
+  mp_limb_t rlimb;
+
+  un = ABSIZ (u);
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  rn = un + limb_cnt;
+
+  if (un == 0)
+    rn = 0;
+  else
+    {
+      rp = MPZ_REALLOC (r, rn + 1);
+      up = PTR(u);
+
+      cnt %= GMP_NUMB_BITS;
+      if (cnt != 0)
+	{
+	  rlimb = mpn_lshift (rp + limb_cnt, up, un, cnt);
+	  rp[rn] = rlimb;
+	  rn += (rlimb != 0);
+	}
+      else
+	{
+	  MPN_COPY_DECR (rp + limb_cnt, up, un);
+	}
+
+      /* Zero all whole limbs at low end.  Do it here and not before calling
+	 mpn_lshift, not to lose for U == R.  */
+      MPN_ZERO (rp, limb_cnt);
+    }
+
+  SIZ(r) = SIZ(u) >= 0 ? rn : -rn;
+}

diff --git a/mpz/mul_i.h b/mpz/mul_i.h
new file mode 100644
index 0000000..65c9c2f
--- /dev/null
+++ b/mpz/mul_i.h

@@ -0,0 +1,106 @@
+/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002, 2005, 2008, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_mul_si
+#define FUNCTION               mpz_mul_si
+#define MULTIPLICAND_UNSIGNED
+#define MULTIPLICAND_ABS(x)    ABS_CAST(unsigned long, (x))
+#endif
+
+#ifdef OPERATION_mul_ui
+#define FUNCTION               mpz_mul_ui
+#define MULTIPLICAND_UNSIGNED  unsigned
+#define MULTIPLICAND_ABS(x)    x
+#endif
+
+#ifndef FUNCTION
+Error, error, unrecognised OPERATION
+#endif
+
+
+void
+FUNCTION (mpz_ptr prod, mpz_srcptr mult,
+          MULTIPLICAND_UNSIGNED long int small_mult)
+{
+  mp_size_t size;
+  mp_size_t sign_product;
+  mp_limb_t sml;
+  mp_limb_t cy;
+  mp_ptr pp;
+
+  sign_product = SIZ(mult);
+  if (sign_product == 0 || small_mult == 0)
+    {
+      SIZ(prod) = 0;
+      return;
+    }
+
+  size = ABS (sign_product);
+
+  sml = MULTIPLICAND_ABS (small_mult);
+
+  if (sml <= GMP_NUMB_MAX)
+    {
+      pp = MPZ_REALLOC (prod, size + 1);
+      cy = mpn_mul_1 (pp, PTR(mult), size, sml);
+      pp[size] = cy;
+      size += cy != 0;
+    }
+#if GMP_NAIL_BITS != 0
+  else
+    {
+      /* Operand too large for the current nails size.  Use temporary for
+	 intermediate products, to allow prod and mult being identical.  */
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+
+      tp = TMP_ALLOC_LIMBS (size + 2);
+
+      /* Use, maybe, mpn_mul_2? */
+      cy = mpn_mul_1 (tp, PTR(mult), size, sml & GMP_NUMB_MASK);
+      tp[size] = cy;
+      cy = mpn_addmul_1 (tp + 1, PTR(mult), size, sml >> GMP_NUMB_BITS);
+      tp[size + 1] = cy;
+      size += 2;
+      MPN_NORMALIZE_NOT_ZERO (tp, size); /* too general, need to trim one or two limb */
+      pp = MPZ_NEWALLOC (prod, size);
+      MPN_COPY (pp, tp, size);
+      TMP_FREE;
+    }
+#endif
+
+  SIZ(prod) = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size;
+}

diff --git a/mpz/mul_si.c b/mpz/mul_si.c
new file mode 100644
index 0000000..9f29f60
--- /dev/null
+++ b/mpz/mul_si.c

@@ -0,0 +1,34 @@
+/* mpz_mul_si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_mul_si
+#include "mul_i.h"

diff --git a/mpz/mul_ui.c b/mpz/mul_ui.c
new file mode 100644
index 0000000..d398c4f
--- /dev/null
+++ b/mpz/mul_ui.c

@@ -0,0 +1,34 @@
+/* mpz_mul_ui (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_mul_ui
+#include "mul_i.h"

diff --git a/mpz/n_pow_ui.c b/mpz/n_pow_ui.c
new file mode 100644
index 0000000..cf293f5
--- /dev/null
+++ b/mpz/n_pow_ui.c

@@ -0,0 +1,532 @@
+/* mpz_n_pow_ui -- mpn raised to ulong.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* Use this to test the mul_2 code on a CPU without a native version of that
+   routine.  */
+#if 0
+#define mpn_mul_2  refmpn_mul_2
+#define HAVE_NATIVE_mpn_mul_2  1
+#endif
+
+
+/* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code.
+   ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on
+   bsize==2 or >2, but separating that isn't easy because there's shared
+   code both before and after (the size calculations and the powers of 2
+   handling).
+
+   Alternatives:
+
+   It would work to just use the mpn_mul powering loop for 1 and 2 limb
+   bases, but the current separate loop allows mul_1 and mul_2 to be done
+   in-place, which might help cache locality a bit.  If mpn_mul was relaxed
+   to allow source==dest when vn==1 or 2 then some pointer twiddling might
+   let us get the same effect in one loop.
+
+   The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't
+   form the biggest possible power of b that fits, only the biggest power of
+   2 power, ie. b^(2^n).  It'd be possible to choose a bigger power, perhaps
+   using mp_bases[b].big_base for small b, and thereby get better value
+   from mpn_mul_1 or mpn_mul_2 in the bignum powering.  It's felt that doing
+   so would be more complicated than it's worth, and could well end up being
+   a slowdown for small e.  For big e on the other hand the algorithm is
+   dominated by mpn_sqr so there wouldn't much of a saving.  The current
+   code can be viewed as simply doing the first few steps of the powering in
+   a single or double limb where possible.
+
+   If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary
+   copy made of b is unnecessary.  We could just use the old alloc'ed block
+   and free it at the end.  But arranging this seems like a lot more trouble
+   than it's worth.  */
+
+
+/* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in
+   a limb without overflowing.
+   FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */
+
+#define GMP_NUMB_HALFMAX  (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1)
+
+
+/* The following are for convenience, they update the size and check the
+   alloc.  */
+
+#define MPN_SQR(dst, alloc, src, size)          \
+  do {                                          \
+    ASSERT (2*(size) <= (alloc));               \
+    mpn_sqr (dst, src, size);                   \
+    (size) *= 2;                                \
+    (size) -= ((dst)[(size)-1] == 0);           \
+  } while (0)
+
+#define MPN_MUL(dst, alloc, src, size, src2, size2)     \
+  do {                                                  \
+    mp_limb_t  cy;                                      \
+    ASSERT ((size) + (size2) <= (alloc));               \
+    cy = mpn_mul (dst, src, size, src2, size2);         \
+    (size) += (size2) - (cy == 0);                      \
+  } while (0)
+
+#define MPN_MUL_2(ptr, size, alloc, mult)       \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+2 <= (alloc));               \
+    cy = mpn_mul_2 (ptr, ptr, size, mult);      \
+    (size)++;                                   \
+    (ptr)[(size)] = cy;                         \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_MUL_1(ptr, size, alloc, limb)       \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+1 <= (alloc));               \
+    cy = mpn_mul_1 (ptr, ptr, size, limb);      \
+    (ptr)[size] = cy;                           \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_LSHIFT(ptr, size, alloc, shift)     \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+1 <= (alloc));               \
+    cy = mpn_lshift (ptr, ptr, size, shift);    \
+    (ptr)[size] = cy;                           \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_RSHIFT_OR_COPY(dst, src, size, shift)       \
+  do {                                                  \
+    if ((shift) == 0)                                   \
+      MPN_COPY (dst, src, size);                        \
+    else                                                \
+      {                                                 \
+        mpn_rshift (dst, src, size, shift);             \
+        (size) -= ((dst)[(size)-1] == 0);               \
+      }                                                 \
+  } while (0)
+
+
+/* ralloc and talloc are only wanted for ASSERTs, after the initial space
+   allocations.  Avoid writing values to them in a normal build, to ensure
+   the compiler lets them go dead.  gcc already figures this out itself
+   actually.  */
+
+#define SWAP_RP_TP                                      \
+  do {                                                  \
+    MP_PTR_SWAP (rp, tp);                               \
+    ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc));      \
+  } while (0)
+
+
+void
+mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
+{
+  mp_ptr         rp;
+  mp_size_t      rtwos_limbs, ralloc, rsize;
+  int            rneg, i, cnt, btwos, r_bp_overlap;
+  mp_limb_t      blimb, rl;
+  mp_bitcnt_t    rtwos_bits;
+#if HAVE_NATIVE_mpn_mul_2
+  mp_limb_t      blimb_low, rl_high;
+#else
+  mp_limb_t      b_twolimbs[2];
+#endif
+  TMP_DECL;
+
+  TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n",
+		 PTR(r), bp, bsize, e, e);
+	 mpn_trace ("b", bp, bsize));
+
+  ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize)));
+
+  /* b^0 == 1, including 0^0 == 1 */
+  if (e == 0)
+    {
+      MPZ_NEWALLOC (r, 1)[0] = 1;
+      SIZ(r) = 1;
+      return;
+    }
+
+  /* 0^e == 0 apart from 0^0 above */
+  if (bsize == 0)
+    {
+      SIZ(r) = 0;
+      return;
+    }
+
+  /* Sign of the final result. */
+  rneg = (bsize < 0 && (e & 1) != 0);
+  bsize = ABS (bsize);
+  TRACE (printf ("rneg %d\n", rneg));
+
+  r_bp_overlap = (PTR(r) == bp);
+
+  /* Strip low zero limbs from b. */
+  rtwos_limbs = 0;
+  for (blimb = *bp; blimb == 0; blimb = *++bp)
+    {
+      rtwos_limbs += e;
+      bsize--; ASSERT (bsize >= 1);
+    }
+  TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs));
+
+  /* Strip low zero bits from b. */
+  count_trailing_zeros (btwos, blimb);
+  blimb >>= btwos;
+  rtwos_bits = e * btwos;
+  rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
+  rtwos_bits %= GMP_NUMB_BITS;
+  TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n",
+		 btwos, rtwos_limbs, rtwos_bits));
+
+  TMP_MARK;
+
+  rl = 1;
+#if HAVE_NATIVE_mpn_mul_2
+  rl_high = 0;
+#endif
+
+  if (bsize == 1)
+    {
+    bsize_1:
+      /* Power up as far as possible within blimb.  We start here with e!=0,
+	 but if e is small then we might reach e==0 and the whole b^e in rl.
+	 Notice this code works when blimb==1 too, reaching e==0.  */
+
+      while (blimb <= GMP_NUMB_HALFMAX)
+	{
+	  TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
+			 e, blimb, rl));
+	  ASSERT (e != 0);
+	  if ((e & 1) != 0)
+	    rl *= blimb;
+	  e >>= 1;
+	  if (e == 0)
+	    goto got_rl;
+	  blimb *= blimb;
+	}
+
+#if HAVE_NATIVE_mpn_mul_2
+      TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n",
+		     e, blimb, rl));
+
+      /* Can power b once more into blimb:blimb_low */
+      bsize = 2;
+      ASSERT (e != 0);
+      if ((e & 1) != 0)
+	{
+	  umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS);
+	  rl >>= GMP_NAIL_BITS;
+	}
+      e >>= 1;
+      umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS);
+      blimb_low >>= GMP_NAIL_BITS;
+
+    got_rl:
+      TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n",
+		     e, blimb, blimb_low, rl_high, rl));
+
+      /* Combine left-over rtwos_bits into rl_high:rl to be handled by the
+	 final mul_1 or mul_2 rather than a separate lshift.
+	 - rl_high:rl mustn't be 1 (since then there's no final mul)
+	 - rl_high mustn't overflow
+	 - rl_high mustn't change to non-zero, since mul_1+lshift is
+	 probably faster than mul_2 (FIXME: is this true?)  */
+
+      if (rtwos_bits != 0
+	  && ! (rl_high == 0 && rl == 1)
+	  && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+	{
+	  mp_limb_t  new_rl_high = (rl_high << rtwos_bits)
+	    | (rl >> (GMP_NUMB_BITS-rtwos_bits));
+	  if (! (rl_high == 0 && new_rl_high != 0))
+	    {
+	      rl_high = new_rl_high;
+	      rl <<= rtwos_bits;
+	      rtwos_bits = 0;
+	      TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
+			     rl_high, rl));
+	    }
+	}
+#else
+    got_rl:
+      TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n",
+		     e, blimb, rl));
+
+      /* Combine left-over rtwos_bits into rl to be handled by the final
+	 mul_1 rather than a separate lshift.
+	 - rl mustn't be 1 (since then there's no final mul)
+	 - rl mustn't overflow	*/
+
+      if (rtwos_bits != 0
+	  && rl != 1
+	  && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+	{
+	  rl <<= rtwos_bits;
+	  rtwos_bits = 0;
+	  TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
+	}
+#endif
+    }
+  else if (bsize == 2)
+    {
+      mp_limb_t  bsecond = bp[1];
+      if (btwos != 0)
+	blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+      bsecond >>= btwos;
+      if (bsecond == 0)
+	{
+	  /* Two limbs became one after rshift. */
+	  bsize = 1;
+	  goto bsize_1;
+	}
+
+      TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb));
+#if HAVE_NATIVE_mpn_mul_2
+      blimb_low = blimb;
+#else
+      bp = b_twolimbs;
+      b_twolimbs[0] = blimb;
+      b_twolimbs[1] = bsecond;
+#endif
+      blimb = bsecond;
+    }
+  else
+    {
+      if (r_bp_overlap || btwos != 0)
+	{
+	  mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
+	  MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
+	  bp = tp;
+	  TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
+	}
+#if HAVE_NATIVE_mpn_mul_2
+      /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */
+      blimb_low = bp[0];
+#endif
+      blimb = bp[bsize-1];
+
+      TRACE (printf ("big bsize=%ld  ", bsize);
+	     mpn_trace ("b", bp, bsize));
+    }
+
+  /* At this point blimb is the most significant limb of the base to use.
+
+     Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1
+     limb to round up the division; +1 for multiplies all using an extra
+     limb over the true size; +2 for rl at the end; +1 for lshift at the
+     end.
+
+     The size calculation here is reasonably accurate.  The base is at least
+     half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits
+     when it will power up as just over 16, an overestimate of 17/16 =
+     6.25%.  For a 64-bit limb it's half that.
+
+     If e==0 then blimb won't be anything useful (though it will be
+     non-zero), but that doesn't matter since we just end up with ralloc==5,
+     and that's fine for 2 limbs of rl and 1 of lshift.  */
+
+  ASSERT (blimb != 0);
+  count_leading_zeros (cnt, blimb);
+  ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;
+  TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n",
+		 ralloc, bsize, blimb, cnt));
+  rp = MPZ_NEWALLOC (r, ralloc + rtwos_limbs);
+
+  /* Low zero limbs resulting from powers of 2. */
+  MPN_ZERO (rp, rtwos_limbs);
+  rp += rtwos_limbs;
+
+  if (e == 0)
+    {
+      /* Any e==0 other than via bsize==1 or bsize==2 is covered at the
+	 start. */
+      rp[0] = rl;
+      rsize = 1;
+#if HAVE_NATIVE_mpn_mul_2
+      rp[1] = rl_high;
+      rsize += (rl_high != 0);
+#endif
+      ASSERT (rp[rsize-1] != 0);
+    }
+  else
+    {
+      mp_ptr     tp;
+      mp_size_t  talloc;
+
+      /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the
+	 low bit of e is zero, tp only has to hold the second last power
+	 step, which is half the size of the final result.  There's no need
+	 to round up the divide by 2, since ralloc includes a +2 for rl
+	 which not needed by tp.  In the mpn_mul loop when the low bit of e
+	 is 1, tp must hold nearly the full result, so just size it the same
+	 as rp.  */
+
+      talloc = ralloc;
+#if HAVE_NATIVE_mpn_mul_2
+      if (bsize <= 2 || (e & 1) == 0)
+	talloc /= 2;
+#else
+      if (bsize <= 1 || (e & 1) == 0)
+	talloc /= 2;
+#endif
+      TRACE (printf ("talloc %ld\n", talloc));
+      tp = TMP_ALLOC_LIMBS (talloc);
+
+      /* Go from high to low over the bits of e, starting with i pointing at
+	 the bit below the highest 1 (which will mean i==-1 if e==1).  */
+      count_leading_zeros (cnt, (mp_limb_t) e);
+      i = GMP_LIMB_BITS - cnt - 2;
+
+#if HAVE_NATIVE_mpn_mul_2
+      if (bsize <= 2)
+	{
+	  mp_limb_t  mult[2];
+
+	  /* Any bsize==1 will have been powered above to be two limbs. */
+	  ASSERT (bsize == 2);
+	  ASSERT (blimb != 0);
+
+	  /* Arrange the final result ends up in r, not in the temp space */
+	  if ((i & 1) == 0)
+	    SWAP_RP_TP;
+
+	  rp[0] = blimb_low;
+	  rp[1] = blimb;
+	  rsize = 2;
+
+	  mult[0] = blimb_low;
+	  mult[1] = blimb;
+
+	  for ( ; i >= 0; i--)
+	    {
+	      TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+			     i, e, rsize, ralloc, talloc);
+		     mpn_trace ("r", rp, rsize));
+
+	      MPN_SQR (tp, talloc, rp, rsize);
+	      SWAP_RP_TP;
+	      if ((e & (1L << i)) != 0)
+		MPN_MUL_2 (rp, rsize, ralloc, mult);
+	    }
+
+	  TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
+	  if (rl_high != 0)
+	    {
+	      mult[0] = rl;
+	      mult[1] = rl_high;
+	      MPN_MUL_2 (rp, rsize, ralloc, mult);
+	    }
+	  else if (rl != 1)
+	    MPN_MUL_1 (rp, rsize, ralloc, rl);
+	}
+#else
+      if (bsize == 1)
+	{
+	  /* Arrange the final result ends up in r, not in the temp space */
+	  if ((i & 1) == 0)
+	    SWAP_RP_TP;
+
+	  rp[0] = blimb;
+	  rsize = 1;
+
+	  for ( ; i >= 0; i--)
+	    {
+	      TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+			     i, e, rsize, ralloc, talloc);
+		     mpn_trace ("r", rp, rsize));
+
+	      MPN_SQR (tp, talloc, rp, rsize);
+	      SWAP_RP_TP;
+	      if ((e & (1L << i)) != 0)
+		MPN_MUL_1 (rp, rsize, ralloc, blimb);
+	    }
+
+	  TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
+	  if (rl != 1)
+	    MPN_MUL_1 (rp, rsize, ralloc, rl);
+	}
+#endif
+      else
+	{
+	  int  parity;
+
+	  /* Arrange the final result ends up in r, not in the temp space */
+	  ULONG_PARITY (parity, e);
+	  if (((parity ^ i) & 1) != 0)
+	    SWAP_RP_TP;
+
+	  MPN_COPY (rp, bp, bsize);
+	  rsize = bsize;
+
+	  for ( ; i >= 0; i--)
+	    {
+	      TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+			     i, e, rsize, ralloc, talloc);
+		     mpn_trace ("r", rp, rsize));
+
+	      MPN_SQR (tp, talloc, rp, rsize);
+	      SWAP_RP_TP;
+	      if ((e & (1L << i)) != 0)
+		{
+		  MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
+		  SWAP_RP_TP;
+		}
+	    }
+	}
+    }
+
+  ASSERT (rp == PTR(r) + rtwos_limbs);
+  TRACE (mpn_trace ("end loop r", rp, rsize));
+  TMP_FREE;
+
+  /* Apply any partial limb factors of 2. */
+  if (rtwos_bits != 0)
+    {
+      MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits);
+      TRACE (mpn_trace ("lshift r", rp, rsize));
+    }
+
+  rsize += rtwos_limbs;
+  SIZ(r) = (rneg ? -rsize : rsize);
+}

diff --git a/mpz/neg.c b/mpz/neg.c
new file mode 100644
index 0000000..7f3fbcf
--- /dev/null
+++ b/mpz/neg.c

@@ -0,0 +1,56 @@
+/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
+
+Copyright 1991, 1993-1995, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_neg 1
+
+#include "gmp-impl.h"
+
+void
+mpz_neg (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp;
+  mp_srcptr up;
+  mp_size_t usize, size;
+
+  usize = SIZ (u);
+
+  if (u != w)
+    {
+      size = ABS (usize);
+
+      wp = MPZ_NEWALLOC (w, size);
+
+      up = PTR (u);
+
+      MPN_COPY (wp, up, size);
+    }
+
+  SIZ (w) = -usize;
+}

diff --git a/mpz/nextprime.c b/mpz/nextprime.c
new file mode 100644
index 0000000..2fe2616
--- /dev/null
+++ b/mpz/nextprime.c

@@ -0,0 +1,291 @@
+/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
+
+Copyright 1999-2001, 2008, 2009, 2012, 2020-2022 Free Software
+Foundation, Inc.
+
+Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+Improved by Seth Troisi.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+
+static const unsigned char primegap_small[] =
+{
+  2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
+  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
+  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
+  12,2,18,6,10
+};
+
+#define NUMBER_OF_PRIMES 100
+#define LAST_PRIME 557
+/* NP_SMALL_LIMIT = prevprime (LAST_PRIME ^ 2) */
+#define NP_SMALL_LIMIT 310243
+
+static unsigned long
+calculate_sievelimit(mp_bitcnt_t nbits) {
+  unsigned long sieve_limit;
+
+  /* Estimate a good sieve bound. Based on derivative of
+   *   Merten's 3rd theorem * avg gap * cost of mod
+   *      vs
+   *   Cost of PRP test O(N^2.55)
+   */
+  if (nbits < 12818)
+    {
+      mpz_t tmp;
+      /* sieve_limit ~= nbits ^ (5/2) / 124 */
+      mpz_init (tmp);
+      mpz_ui_pow_ui (tmp, nbits, 5);
+      mpz_tdiv_q_ui(tmp, tmp, 124*124);
+      /* tmp < 12818^5/(124*124) < 2^55 < 2^64 */
+      mpz_sqrt (tmp, tmp);
+
+      sieve_limit = mpz_get_ui(tmp);
+      mpz_clear (tmp);
+    }
+  else
+    {
+      /* Larger threshold is faster but takes (n/ln(n) + n/24) memory.
+       * For 33,000 bits limitting to 150M is ~12% slower than using the
+       * optimal 1.5G sieve_limit.
+       */
+      sieve_limit = 150000001;
+    }
+
+  ASSERT (1000 < sieve_limit && sieve_limit <= 150000001);
+  return sieve_limit;
+}
+
+static unsigned
+findnext_small (unsigned t, short diff)
+{
+  /* For diff= 2, expect t = 1 if operand was negative.
+   * For diff=-2, expect t >= 3
+   */
+  ASSERT (t >= 3 || (diff > 0 && t >= 1));
+  ASSERT (t < NP_SMALL_LIMIT);
+
+  /* Start from next candidate (2 or odd) */
+  t = diff > 0 ?
+    (t + 1) | (t != 1) :
+    ((t - 2) | 1) + (t == 3);
+
+  for (; ; t += diff)
+    {
+      unsigned prime = 3;
+      for (int i = 0; ; prime += primegap_small[i++])
+	{
+	  unsigned q, r;
+	  q = t / prime;
+	  r = t - q * prime; /* r = t % prime; */
+	  if (q < prime)
+	    return t;
+	  if (r == 0)
+	    break;
+	  ASSERT (i < NUMBER_OF_PRIMES);
+	}
+    }
+}
+
+static int
+findnext (mpz_ptr p,
+          unsigned long(*negative_mod_ui)(const mpz_t, unsigned long),
+          void(*increment_ui)(mpz_t, const mpz_t, unsigned long))
+{
+  char *composite;
+  const unsigned char *primegap;
+  unsigned long prime_limit;
+  mp_size_t pn;
+  mp_bitcnt_t nbits;
+  int i, m;
+  unsigned odds_in_composite_sieve;
+  TMP_DECL;
+
+  TMP_MARK;
+  pn = SIZ(p);
+  MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
+  /* Smaller numbers handled earlier */
+  ASSERT (nbits >= 3);
+  /* Make p odd */
+  PTR(p)[0] |= 1;
+
+  if (nbits / 2 <= NUMBER_OF_PRIMES)
+    {
+      primegap = primegap_small;
+      prime_limit = nbits / 2;
+    }
+  else
+    {
+      unsigned long sieve_limit;
+      mp_limb_t *sieve;
+      unsigned char *primegap_tmp;
+      unsigned long last_prime;
+
+      /* sieve numbers up to sieve_limit and save prime count */
+      sieve_limit = calculate_sievelimit(nbits);
+      sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit));
+      prime_limit = gmp_primesieve(sieve, sieve_limit);
+
+      /* TODO: Storing (prime - last_prime)/2 would allow this to go
+	 up to the gap 304599508537+514=304599509051 .
+	 With the current code our limit is 436273009+282=436273291 */
+      ASSERT (sieve_limit < 436273291);
+      /* THINK: Memory used by both sieve and primegap_tmp is kept
+	 allocated, but they may overlap if primegap is filled from
+	 larger down to smaller primes...
+       */
+
+      /* Needed to avoid assignment of read-only location */
+      primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char);
+      primegap = primegap_tmp;
+
+      i = 0;
+      last_prime = 3;
+      /* THINK: should we get rid of sieve_limit and use (i < prime_limit)? */
+      for (mp_limb_t j = 4, *sp = sieve; j < sieve_limit; j += GMP_LIMB_BITS * 3)
+	for (mp_limb_t b = j, x = ~ *(sp++); x != 0; b += 3, x >>= 1)
+	  if (x & 1)
+	    {
+	      mp_limb_t prime = b | 1;
+	      primegap_tmp[i++] = prime - last_prime;
+	      last_prime = prime;
+	    }
+
+      /* Both primesieve and prime_limit ignore the first two primes. */
+      ASSERT(i == prime_limit);
+    }
+
+  if (nbits <= 32)
+    odds_in_composite_sieve = 336 / 2;
+  else if (nbits <= 64)
+    odds_in_composite_sieve = 1550 / 2;
+  else
+    /* Corresponds to a merit 14 prime_gap, which is rare. */
+    odds_in_composite_sieve = 5 * nbits;
+
+  /* composite[2*i] stores if p+2*i is a known composite */
+  composite = TMP_ALLOC_TYPE (odds_in_composite_sieve, char);
+
+  for (;;)
+    {
+      unsigned long difference;
+      unsigned long incr, prime;
+      int primetest;
+
+      memset (composite, 0, odds_in_composite_sieve);
+      prime = 3;
+      for (i = 0; i < prime_limit; i++)
+        {
+          /* Distance to next multiple of prime */
+          m = negative_mod_ui(p, prime);
+          /* Only care about odd multiplies of prime. */
+          if (m & 1)
+            m += prime;
+          m >>= 1;
+
+          /* Mark off any composites in sieve */
+          for (; m < odds_in_composite_sieve; m += prime)
+            composite[m] = 1;
+          prime += primegap[i];
+        }
+
+      difference = 0;
+      for (incr = 0; incr < odds_in_composite_sieve; difference += 2, incr += 1)
+        {
+          if (composite[incr])
+            continue;
+
+          increment_ui(p, p, difference);
+          difference = 0;
+
+          /* Miller-Rabin test */
+          primetest = mpz_millerrabin (p, 25);
+          if (primetest)
+	    {
+	      TMP_FREE;
+	      return primetest;
+	    }
+        }
+
+      /* Sieve next segment, very rare */
+      increment_ui(p, p, difference);
+  }
+}
+
+void
+mpz_nextprime (mpz_ptr p, mpz_srcptr n)
+{
+  /* Handle negative and small numbers */
+  if (mpz_cmp_ui (n, NP_SMALL_LIMIT) < 0)
+    {
+      ASSERT (NP_SMALL_LIMIT < UINT_MAX);
+      mpz_set_ui (p, findnext_small (SIZ (n) > 0 ? mpz_get_ui (n) : 1, +2));
+      return;
+    }
+
+  /* First odd greater than n */
+  mpz_add_ui (p, n, 1);
+
+  findnext(p, mpz_cdiv_ui, mpz_add_ui);
+}
+
+int
+mpz_prevprime (mpz_ptr p, mpz_srcptr n)
+{
+  /* Handle negative and small numbers */
+  if (mpz_cmp_ui (n, 2) <= 0)
+    return 0;
+
+  if (mpz_cmp_ui (n, NP_SMALL_LIMIT) < 0)
+    {
+      ASSERT (NP_SMALL_LIMIT < UINT_MAX);
+      mpz_set_ui (p, findnext_small (mpz_get_ui (n), -2));
+      return 2;
+    }
+
+  /* First odd less than n */
+  mpz_sub_ui (p, n, 2);
+
+  return findnext(p, mpz_tdiv_ui, mpz_sub_ui);
+}
+

diff --git a/mpz/oddfac_1.c b/mpz/oddfac_1.c
new file mode 100644
index 0000000..3f4c085
--- /dev/null
+++ b/mpz/oddfac_1.c

@@ -0,0 +1,435 @@
+/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010-2012, 2015-2017, 2020, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* TODO:
+   - split this file in smaller parts with functions that can be recycled for different computations.
+ */
+
+/**************************************************************/
+/* Section macros: common macros, for mswing/fac/bin (&sieve) */
+/**************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
+  if ((PR) > (MAX_PR)) {					\
+    (VEC)[(I)++] = (PR);					\
+    (PR) = 1;							\
+  }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end)			\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if ((*__sieve & __mask) == 0)				\
+	{							\
+	  mp_limb_t prime;					\
+	  prime = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
+  do {								\
+    mp_limb_t __mask, *__sieve, __max_i, __i;			\
+								\
+    __i = (start)-(off);					\
+    __sieve = (sieve) + __i / GMP_LIMB_BITS;			\
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
+    __i += (off);						\
+								\
+    LOOP_ON_SIEVE_CONTINUE(prime,end)
+
+#define LOOP_ON_SIEVE_STOP					\
+	}							\
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
+      __sieve += __mask & 1;					\
+    }  while (__i <= __max_i)
+
+#define LOOP_ON_SIEVE_END					\
+    LOOP_ON_SIEVE_STOP;						\
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if WANT_ASSERT
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section mswing: 2-multiswing factorial                */
+/*********************************************************/
+
+/* Returns an approximation of the sqare root of x.
+ * It gives:
+ *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
+ * or
+ *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
+ */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x);
+  s = (GMP_LIMB_BITS - s) >> 1;
+  return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
+}
+
+#if 0
+/* A count-then-exponentiate variant for SWING_A_PRIME */
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)		\
+  do {							\
+    mp_limb_t __q, __prime;				\
+    int __exp;						\
+    __prime = (P);					\
+    __exp = 0;						\
+    __q = (N);						\
+    do {						\
+      __q /= __prime;					\
+      __exp += __q & 1;					\
+    } while (__q >= __prime);				\
+    if (__exp) { /* Store $prime^{exp}$ */		\
+      for (__q = __prime; --__exp; __q *= __prime);	\
+      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);	\
+    };							\
+  } while (0)
+#else
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
+  do {						\
+    mp_limb_t __q, __prime;			\
+    __prime = (P);				\
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
+    __q = (N);					\
+    do {					\
+      __q /= __prime;				\
+      if ((__q & 1) != 0) (PR) *= __prime;	\
+    } while (__q >= __prime);			\
+  } while (0)
+#endif
+
+#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __prime;					\
+    __prime = (P);					\
+    if ((((N) / __prime) & 1) != 0)			\
+      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
+  } while (0)
+
+/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
+   factorial of the parameter n.  The result x is an odd positive
+   integer so that multiswing(n,2) = x 2^a.
+
+   Uses the algorithm described by Peter Luschny in "Divide, Swing and
+   Conquer the Factorial!".
+
+   The pointer sieve points to primesieve_size(n) limbs containing a
+   bit-array where primes are marked as 0.
+   Enough (FIXME: explain :-) limbs must be pointed by factors.
+ */
+
+static void
+mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
+{
+  mp_limb_t prod, max_prod;
+  mp_size_t j;
+
+  ASSERT (n > 25);
+
+  j = 0;
+  prod  = -(n & 1);
+  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
+
+  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
+  max_prod = GMP_NUMB_MAX / (n-1);
+
+  /* Handle prime = 3 separately. */
+  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
+
+  /* Swing primes from 5 to n/3 */
+  {
+    mp_limb_t s, l_max_prod;
+
+    s = limb_apprsqrt(n);
+    ASSERT (s >= 5);
+    s = n_to_bit (s);
+    ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
+    ASSERT (s < n_to_bit (n / 3));
+    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+    SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
+    LOOP_ON_SIEVE_STOP;
+
+    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
+
+    l_max_prod = max_prod * 3;
+
+    LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3));
+    SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
+    LOOP_ON_SIEVE_END;
+  }
+
+  /* Store primes from (n+1)/2 to n */
+  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
+  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+  LOOP_ON_SIEVE_END;
+
+  if (LIKELY (j != 0))
+    {
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
+    }
+  else
+    {
+      ASSERT (ALLOC (x) > 0);
+      PTR (x)[0] = prod;
+      SIZ (x) = 1;
+    }
+}
+
+#undef SWING_A_PRIME
+#undef SH_SWING_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+#undef FACTOR_LIST_APPEND
+
+/*********************************************************/
+/* Section oddfac: odd factorial, needed also by binomial*/
+/*********************************************************/
+
+/* FIXME: refine che following estimate. */
+
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD_LIMIT*FAC_DSC_THRESHOLD_LIMIT-1)+1) - 1)
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD*FAC_DSC_THRESHOLD-1)+1) - 1)
+#endif
+
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
+   odd positive integer.
+
+   If flag != 0 a square is skipped in the DSC part, e.g.
+   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
+
+   If n is too small, flag is ignored, and an ASSERT can be triggered.
+
+   TODO: FAC_DSC_THRESHOLD is used here with two different roles:
+    - to decide when prime factorisation is needed,
+    - to stop the recursion, once sieving is done.
+   Maybe two thresholds can do a better job.
+ */
+void
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+  ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
+
+  if (n <= ODD_FACTORIAL_TABLE_LIMIT)
+    {
+      MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
+      SIZ (x) = 1;
+    }
+  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
+    {
+      mp_ptr   px;
+
+      px = MPZ_NEWALLOC (x, 2);
+      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
+      SIZ (x) = 2;
+    }
+  else
+    {
+      unsigned s;
+      mp_ptr   factors;
+
+      s = 0;
+      {
+	mp_limb_t tn;
+	mp_limb_t prod, max_prod;
+	mp_size_t j;
+	TMP_SDECL;
+
+#if TUNE_PROGRAM_BUILD
+	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+	ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
+#endif
+
+	/* Compute the number of recursive steps for the DSC algorithm. */
+	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+	  tn >>= 1;
+
+	j = 0;
+
+	TMP_SMARK;
+	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
+	ASSERT (tn >= FACTORS_PER_LIMB);
+
+	prod = 1;
+#if TUNE_PROGRAM_BUILD
+	max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
+#else
+	max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD * FAC_DSC_THRESHOLD);
+#endif
+
+	ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+	do {
+	  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+	  mp_limb_t diff = (tn - ODD_DOUBLEFACTORIAL_TABLE_LIMIT) & -CNST_LIMB (2);
+	  if ((diff & 2) != 0)
+	    {
+	      FACTOR_LIST_STORE (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff, prod, max_prod, factors, j);
+	      diff -= 2;
+	    }
+	  if (diff != 0)
+	    {
+	      mp_limb_t fac = (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2) *
+		(ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff);
+	      do {
+		FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
+		diff -= 4;
+		fac += diff * 2;
+	      } while (diff != 0);
+	    }
+	  max_prod <<= 2;
+	  tn >>= 1;
+	} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+
+	factors[j++] = prod;
+	factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
+	factors[j++] = __gmp_oddfac_table[tn >> 1];
+	mpz_prodlimbs (x, factors, j);
+
+	TMP_SFREE;
+      }
+
+      if (s != 0)
+	/* Use the algorithm described by Peter Luschny in "Divide,
+	   Swing and Conquer the Factorial!".
+
+	   Improvement: there are two temporary buffers, factors and
+	   square, that are never used together; with a good estimate
+	   of the maximal needed size, they could share a single
+	   allocation.
+	*/
+	{
+	  mpz_t mswing;
+	  mp_ptr sieve;
+	  mp_size_t size;
+	  TMP_DECL;
+
+	  TMP_MARK;
+
+	  flag--;
+	  size = n / GMP_NUMB_BITS + 4;
+	  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
+	  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
+	     one more can be overwritten by mul, another for the sieve */
+	  MPZ_TMP_INIT (mswing, size);
+	  /* Initialize size, so that ASSERT can check it correctly. */
+	  ASSERT_CODE (SIZ (mswing) = 0);
+
+	  /* Put the sieve on the second half, it will be overwritten by the last mswing. */
+	  sieve = PTR (mswing) + size / 2 + 1;
+
+	  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
+
+	  factors = TMP_ALLOC_LIMBS (size);
+	  do {
+	    mp_ptr    square, px;
+	    mp_size_t nx, ns;
+	    mp_limb_t cy;
+	    TMP_DECL;
+
+	    s--;
+	    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
+	    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
+
+	    TMP_MARK;
+	    nx = SIZ (x);
+	    if (s == flag) {
+	      size = nx;
+	      square = TMP_ALLOC_LIMBS (size);
+	      MPN_COPY (square, PTR (x), nx);
+	    } else {
+	      size = nx << 1;
+	      square = TMP_ALLOC_LIMBS (size);
+	      mpn_sqr (square, PTR (x), nx);
+	      size -= (square[size - 1] == 0);
+	    }
+	    ns = SIZ (mswing);
+	    nx = size + ns;
+	    px = MPZ_NEWALLOC (x, nx);
+	    ASSERT (ns <= size);
+	    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
+
+	    SIZ(x) = nx - (cy == 0);
+	    TMP_FREE;
+	  } while (s != 0);
+	  TMP_FREE;
+	}
+    }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE

diff --git a/mpz/out_raw.c b/mpz/out_raw.c
new file mode 100644
index 0000000..b9fd086
--- /dev/null
+++ b/mpz/out_raw.c

@@ -0,0 +1,172 @@
+/* mpz_out_raw -- write an mpz_t in raw format.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* HTON_LIMB_STORE takes a normal host byte order limb and stores it as
+   network byte order (ie. big endian). */
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HTON_LIMB_STORE(dst, limb)  do { *(dst) = (limb); } while (0)
+#endif
+
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HTON_LIMB_STORE(dst, limb)  BSWAP_LIMB_STORE (dst, limb)
+#endif
+
+#ifndef HTON_LIMB_STORE
+#define HTON_LIMB_STORE(dst, limb)                                      \
+  do {                                                                  \
+    mp_limb_t  __limb = (limb);                                         \
+    char      *__p = (char *) (dst);                                    \
+    int        __i;                                                     \
+    for (__i = 0; __i < GMP_LIMB_BYTES; __i++)                       \
+      __p[__i] = (char) (__limb >> ((GMP_LIMB_BYTES-1 - __i) * 8));  \
+  } while (0)
+#endif
+
+
+size_t
+mpz_out_raw (FILE *fp, mpz_srcptr x)
+{
+  mp_size_t   xsize, abs_xsize, bytes, i;
+  mp_srcptr   xp;
+  char        *tp, *bp;
+  mp_limb_t   xlimb;
+  int         zeros;
+  size_t      tsize, ssize;
+
+  xsize = SIZ(x);
+  abs_xsize = ABS (xsize);
+  bytes = (abs_xsize * GMP_NUMB_BITS + 7) / 8;
+  tsize = ROUND_UP_MULTIPLE ((unsigned) 4, GMP_LIMB_BYTES) + bytes;
+
+  tp = __GMP_ALLOCATE_FUNC_TYPE (tsize, char);
+  bp = tp + ROUND_UP_MULTIPLE ((unsigned) 4, GMP_LIMB_BYTES);
+
+  if (bytes != 0)
+    {
+      bp += bytes;
+      xp = PTR (x);
+      i = abs_xsize;
+
+      if (GMP_NAIL_BITS == 0)
+	{
+	  /* reverse limb order, and byte swap if necessary */
+#ifdef _CRAY
+	  _Pragma ("_CRI ivdep");
+#endif
+	  do
+	    {
+	      bp -= GMP_LIMB_BYTES;
+	      xlimb = *xp;
+	      HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
+	      xp++;
+	    }
+	  while (--i > 0);
+
+	  /* strip high zero bytes (without fetching from bp) */
+	  count_leading_zeros (zeros, xlimb);
+	  zeros /= 8;
+	  bp += zeros;
+	  bytes -= zeros;
+	}
+      else
+	{
+	  mp_limb_t  new_xlimb;
+	  int        bits;
+	  ASSERT_CODE (char *bp_orig = bp - bytes);
+
+	  ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
+
+	  bits = 0;
+	  xlimb = 0;
+	  for (;;)
+	    {
+	      while (bits >= 8)
+		{
+		  ASSERT (bp > bp_orig);
+		  *--bp = xlimb & 0xFF;
+		  xlimb >>= 8;
+		  bits -= 8;
+		}
+
+	      if (i == 0)
+		break;
+
+	      new_xlimb = *xp++;
+	      i--;
+	      ASSERT (bp > bp_orig);
+	      *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
+	      xlimb = new_xlimb >> (8 - bits);
+	      bits += GMP_NUMB_BITS - 8;
+	    }
+
+	  if (bits != 0)
+	    {
+	      ASSERT (bp > bp_orig);
+	      *--bp = xlimb;
+	    }
+
+	  ASSERT (bp == bp_orig);
+	  while (*bp == 0)
+	    {
+	      bp++;
+	      bytes--;
+	    }
+	}
+    }
+
+  /* total bytes to be written */
+  ssize = 4 + bytes;
+
+  /* twos complement negative for the size value */
+  bytes = (xsize >= 0 ? bytes : -bytes);
+
+  /* so we don't rely on sign extension in ">>" */
+  ASSERT_ALWAYS (sizeof (bytes) >= 4);
+
+  bp[-4] = bytes >> 24;
+  bp[-3] = bytes >> 16;
+  bp[-2] = bytes >> 8;
+  bp[-1] = bytes;
+  bp -= 4;
+
+  if (fp == 0)
+    fp = stdout;
+  if (fwrite (bp, ssize, 1, fp) != 1)
+    ssize = 0;
+
+  (*__gmp_free_func) (tp, tsize);
+  return ssize;
+}

diff --git a/mpz/out_str.c b/mpz/out_str.c
new file mode 100644
index 0000000..b1d8ae8
--- /dev/null
+++ b/mpz/out_str.c

@@ -0,0 +1,108 @@
+/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
+   integer INTEGER in base BASE.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2005, 2011, 2012, 2017 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+size_t
+mpz_out_str (FILE *stream, int base, mpz_srcptr x)
+{
+  mp_ptr xp;
+  mp_size_t x_size = SIZ (x);
+  unsigned char *str;
+  size_t str_size;
+  size_t i;
+  size_t written;
+  const char *num_to_text;
+  TMP_DECL;
+
+  if (stream == 0)
+    stream = stdout;
+
+  num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+  if (base > 1)
+    {
+      if (base <= 36)
+	num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      else if (UNLIKELY (base > 62))
+	    return 0;
+    }
+  else if (base > -2)
+    {
+      base = 10;
+    }
+  else
+    {
+      base = -base;
+      if (UNLIKELY (base > 36))
+	return 0;
+    }
+
+  written = 0;
+
+  if (x_size < 0)
+    {
+      fputc ('-', stream);
+      x_size = -x_size;
+      written = 1;
+    }
+
+  TMP_MARK;
+
+  DIGITS_IN_BASE_PER_LIMB (str_size, x_size, base);
+  str_size += 3;
+  str = (unsigned char *) TMP_ALLOC (str_size);
+
+  xp = PTR (x);
+  if (! POW2_P (base))
+    {
+      xp = TMP_ALLOC_LIMBS (x_size | 1);  /* |1 in case x_size==0 */
+      MPN_COPY (xp, PTR (x), x_size);
+    }
+
+  str_size = mpn_get_str (str, base, xp, x_size);
+
+  /* Convert result to printable chars.  */
+  for (i = 0; i < str_size; i++)
+    str[i] = num_to_text[str[i]];
+  str[str_size] = 0;
+
+  {
+    size_t fwret;
+    fwret = fwrite ((char *) str, 1, str_size, stream);
+    written += fwret;
+  }
+
+  TMP_FREE;
+  return ferror (stream) ? 0 : written;
+}

diff --git a/mpz/perfpow.c b/mpz/perfpow.c
new file mode 100644
index 0000000..9bbb497
--- /dev/null
+++ b/mpz/perfpow.c

@@ -0,0 +1,38 @@
+/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power,
+   zero otherwise.
+
+Copyright 1998-2001, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+int
+mpz_perfect_power_p (mpz_srcptr u)
+{
+  return mpn_perfect_power_p (PTR (u), SIZ (u));
+}

diff --git a/mpz/perfsqr.c b/mpz/perfsqr.c
new file mode 100644
index 0000000..c2ac924
--- /dev/null
+++ b/mpz/perfsqr.c

@@ -0,0 +1,34 @@
+/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square,
+   zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_perfect_square_p 1
+
+#include "gmp-impl.h"

diff --git a/mpz/popcount.c b/mpz/popcount.c
new file mode 100644
index 0000000..e9a85bb
--- /dev/null
+++ b/mpz/popcount.c

@@ -0,0 +1,34 @@
+/* mpz_popcount(mpz_ptr op) -- Population count of OP.  If the operand is
+   negative, return ~0 (a novel representation of infinity).
+
+Copyright 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_popcount 1
+
+#include "gmp-impl.h"

diff --git a/mpz/pow_ui.c b/mpz/pow_ui.c
new file mode 100644
index 0000000..8bafaa7
--- /dev/null
+++ b/mpz/pow_ui.c

@@ -0,0 +1,52 @@
+/* mpz_pow_ui -- mpz raised to ulong.
+
+Copyright 2001, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_pow_ui (mpz_ptr r, mpz_srcptr b, unsigned long int e)
+{
+  /* We test some small exponents here, mainly to avoid the overhead of
+     mpz_n_pow_ui for small bases and exponents.  */
+  switch (e)
+    {
+    case 0:
+      mpz_set_ui (r, 1);
+      break;
+    case 1:
+      mpz_set (r, b);
+      break;
+    case 2:
+      mpz_mul (r, b, b);
+      break;
+    default:
+      mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), e);
+    }
+}

diff --git a/mpz/powm.c b/mpz/powm.c
new file mode 100644
index 0000000..f1bf8e3
--- /dev/null
+++ b/mpz/powm.c

@@ -0,0 +1,282 @@
+/* mpz_powm(res,base,exp,mod) -- Set R to (U^E) mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009,
+2011, 2012, 2015, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* TODO
+
+ * Improve handling of buffers.  It is pretty ugly now.
+
+ * For even moduli, we compute a binvert of its odd part both here and in
+   mpn_powm.  How can we avoid this recomputation?
+*/
+
+/*
+  b ^ e mod m   res
+  0   0     0    ?
+  0   e     0    ?
+  0   0     m    ?
+  0   e     m    0
+  b   0     0    ?
+  b   e     0    ?
+  b   0     m    1 mod m
+  b   e     m    b^e mod m
+*/
+
+#define HANDLE_NEGATIVE_EXPONENT 1
+
+void
+mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
+{
+  mp_size_t n, nodd, ncnt;
+  int cnt;
+  mp_ptr rp, tp;
+  mp_srcptr bp, ep, mp;
+  mp_size_t rn, bn, es, en, itch;
+  mpz_t new_b;			/* note: value lives long via 'b' */
+  TMP_DECL;
+
+  n = ABSIZ(m);
+  if (UNLIKELY (n == 0))
+    DIVIDE_BY_ZERO;
+
+  mp = PTR(m);
+
+  TMP_MARK;
+
+  es = SIZ(e);
+  if (UNLIKELY (es <= 0))
+    {
+      if (es == 0)
+	{
+	  /* b^0 mod m,  b is anything and m is non-zero.
+	     Result is 1 mod m, i.e., 1 or 0 depending on if m = 1.  */
+	  SIZ(r) = n != 1 || mp[0] != 1;
+	  MPZ_NEWALLOC (r, 1)[0] = 1;
+	  TMP_FREE;	/* we haven't really allocated anything here */
+	  return;
+	}
+#if HANDLE_NEGATIVE_EXPONENT
+      MPZ_TMP_INIT (new_b, n + 1);
+
+      if (UNLIKELY (! mpz_invert (new_b, b, m)))
+	DIVIDE_BY_ZERO;
+      b = new_b;
+      es = -es;
+#else
+      DIVIDE_BY_ZERO;
+#endif
+    }
+  en = es;
+
+  bn = ABSIZ(b);
+
+  if (UNLIKELY (bn == 0))
+    {
+      SIZ(r) = 0;
+      TMP_FREE;
+      return;
+    }
+
+  ep = PTR(e);
+
+  /* Handle (b^1 mod m) early, since mpn_pow* do not handle that case.  */
+  if (UNLIKELY (en == 1 && ep[0] == 1))
+    {
+      rp = TMP_ALLOC_LIMBS (n);
+      bp = PTR(b);
+      if (bn >= n)
+	{
+	  mp_ptr qp = TMP_ALLOC_LIMBS (bn - n + 1);
+	  mpn_tdiv_qr (qp, rp, 0L, bp, bn, mp, n);
+	  rn = n;
+	  MPN_NORMALIZE (rp, rn);
+
+	  if (rn != 0 && SIZ(b) < 0)
+	    {
+	      mpn_sub (rp, mp, n, rp, rn);
+	      rn = n;
+	      MPN_NORMALIZE_NOT_ZERO (rp, rn);
+	    }
+	}
+      else
+	{
+	  if (SIZ(b) < 0)
+	    {
+	      mpn_sub (rp, mp, n, bp, bn);
+	      rn = n;
+	      MPN_NORMALIZE_NOT_ZERO (rp, rn);
+	    }
+	  else
+	    {
+	      MPN_COPY (rp, bp, bn);
+	      rn = bn;
+	    }
+	}
+      goto ret;
+    }
+
+  /* Remove low zero limbs from M.  This loop will terminate for correctly
+     represented mpz numbers.  */
+  ncnt = 0;
+  while (UNLIKELY (mp[0] == 0))
+    {
+      mp++;
+      ncnt++;
+    }
+  nodd = n - ncnt;
+  cnt = 0;
+  if (mp[0] % 2 == 0)
+    {
+      mp_ptr newmp = TMP_ALLOC_LIMBS (nodd);
+      count_trailing_zeros (cnt, mp[0]);
+      mpn_rshift (newmp, mp, nodd, cnt);
+      nodd -= newmp[nodd - 1] == 0;
+      mp = newmp;
+      ncnt++;
+    }
+
+  if (ncnt != 0)
+    {
+      /* We will call both mpn_powm and mpn_powlo.  */
+      /* rp needs n, mpn_powlo needs 4n, the 2 mpn_binvert might need more */
+      mp_size_t n_largest_binvert = MAX (ncnt, nodd);
+      mp_size_t itch_binvert = mpn_binvert_itch (n_largest_binvert);
+      itch = 3 * n + MAX (itch_binvert, 2 * n);
+    }
+  else
+    {
+      /* We will call just mpn_powm.  */
+      mp_size_t itch_binvert = mpn_binvert_itch (nodd);
+      itch = n + MAX (itch_binvert, 2 * n);
+    }
+  tp = TMP_ALLOC_LIMBS (itch);
+
+  rp = tp;  tp += n;
+
+  bp = PTR(b);
+  mpn_powm (rp, bp, bn, ep, en, mp, nodd, tp);
+
+  rn = n;
+
+  if (ncnt != 0)
+    {
+      mp_ptr r2, xp, yp, odd_inv_2exp;
+      unsigned long t;
+      int bcnt;
+
+      if (bn < ncnt)
+	{
+	  mp_ptr newbp = TMP_ALLOC_LIMBS (ncnt);
+	  MPN_COPY (newbp, bp, bn);
+	  MPN_ZERO (newbp + bn, ncnt - bn);
+	  bp = newbp;
+	}
+
+      r2 = tp;
+
+      if (bp[0] % 2 == 0)
+	{
+	  if (en > 1)
+	    {
+	      MPN_ZERO (r2, ncnt);
+	      goto zero;
+	    }
+
+	  ASSERT (en == 1);
+	  t = (ncnt - (cnt != 0)) * GMP_NUMB_BITS + cnt;
+
+	  /* Count number of low zero bits in B, up to 3.  */
+	  bcnt = (0x1213 >> ((bp[0] & 7) << 1)) & 0x3;
+	  /* Note that ep[0] * bcnt might overflow, but that just results
+	     in a missed optimization.  */
+	  if (ep[0] * bcnt >= t)
+	    {
+	      MPN_ZERO (r2, ncnt);
+	      goto zero;
+	    }
+	}
+
+      mpn_powlo (r2, bp, ep, en, ncnt, tp + ncnt);
+
+    zero:
+      if (nodd < ncnt)
+	{
+	  mp_ptr newmp = TMP_ALLOC_LIMBS (ncnt);
+	  MPN_COPY (newmp, mp, nodd);
+	  MPN_ZERO (newmp + nodd, ncnt - nodd);
+	  mp = newmp;
+	}
+
+      odd_inv_2exp = tp + n;
+      mpn_binvert (odd_inv_2exp, mp, ncnt, tp + 2 * n);
+
+      mpn_sub (r2, r2, ncnt, rp, nodd > ncnt ? ncnt : nodd);
+
+      xp = tp + 2 * n;
+      mpn_mullo_n (xp, odd_inv_2exp, r2, ncnt);
+
+      if (cnt != 0)
+	xp[ncnt - 1] &= (CNST_LIMB(1) << cnt) - 1;
+
+      yp = tp;
+      if (ncnt > nodd)
+	mpn_mul (yp, xp, ncnt, mp, nodd);
+      else
+	mpn_mul (yp, mp, nodd, xp, ncnt);
+
+      mpn_add (rp, yp, n, rp, nodd);
+
+      ASSERT (nodd + ncnt >= n);
+      ASSERT (nodd + ncnt <= n + 1);
+    }
+
+  MPN_NORMALIZE (rp, rn);
+
+  if ((ep[0] & 1) && SIZ(b) < 0 && rn != 0)
+    {
+      mpn_sub (rp, PTR(m), n, rp, rn);
+      rn = n;
+      MPN_NORMALIZE (rp, rn);
+    }
+
+ ret:
+  MPZ_NEWALLOC (r, rn);
+  SIZ(r) = rn;
+  MPN_COPY (PTR(r), rp, rn);
+
+  TMP_FREE;
+}

diff --git a/mpz/powm_sec.c b/mpz/powm_sec.c
new file mode 100644
index 0000000..a2581a8
--- /dev/null
+++ b/mpz/powm_sec.c

@@ -0,0 +1,102 @@
+/* mpz_powm_sec(res,base,exp,mod) -- Set R to (U^E) mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009,
+2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+
+
+void
+mpz_powm_sec (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
+{
+  mp_size_t n;
+  mp_ptr rp, tp;
+  mp_srcptr bp, ep, mp;
+  mp_size_t rn, bn, es, en;
+  TMP_DECL;
+
+  n = ABSIZ(m);
+
+  mp = PTR(m);
+
+  if (UNLIKELY ((n == 0) || (mp[0] % 2 == 0)))
+    DIVIDE_BY_ZERO;
+
+  es = SIZ(e);
+  if (UNLIKELY (es <= 0))
+    {
+      if (es == 0)
+	{
+	  /* b^0 mod m,  b is anything and m is non-zero.
+	     Result is 1 mod m, i.e., 1 or 0 depending on if m = 1.  */
+	  SIZ(r) = n != 1 || mp[0] != 1;
+	  MPZ_NEWALLOC (r, 1)[0] = 1;
+	  return;
+	}
+      DIVIDE_BY_ZERO;
+    }
+  en = es;
+
+  bn = ABSIZ(b);
+
+  if (UNLIKELY (bn == 0))
+    {
+      SIZ(r) = 0;
+      return;
+    }
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (rp, n,
+		     tp, mpn_sec_powm_itch (bn, en * GMP_NUMB_BITS, n));
+
+  bp = PTR(b);
+  ep = PTR(e);
+
+  mpn_sec_powm (rp, bp, bn, ep, en * GMP_NUMB_BITS, mp, n, tp);
+
+  rn = n;
+
+  MPN_NORMALIZE (rp, rn);
+
+  if ((ep[0] & 1) && SIZ(b) < 0 && rn != 0)
+    {
+      mpn_sub (rp, PTR(m), n, rp, rn);
+      rn = n;
+      MPN_NORMALIZE (rp, rn);
+    }
+
+  MPZ_NEWALLOC (r, rn);
+  SIZ(r) = rn;
+  MPN_COPY (PTR(r), rp, rn);
+
+  TMP_FREE;
+}

diff --git a/mpz/powm_ui.c b/mpz/powm_ui.c
new file mode 100644
index 0000000..23d3ed8
--- /dev/null
+++ b/mpz/powm_ui.c

@@ -0,0 +1,281 @@
+/* mpz_powm_ui(res,base,exp,mod) -- Set R to (B^E) mod M.
+
+   Contributed to the GNU project by Torbjörn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009,
+2011-2013, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This code is very old, and should be rewritten to current GMP standard.  It
+   is slower than mpz_powm for large exponents, but also for small exponents
+   when the mod argument is small.
+
+   As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
+*/
+
+/*
+  b ^ e mod m   res
+  0   0     0    ?
+  0   e     0    ?
+  0   0     m    ?
+  0   e     m    0
+  b   0     0    ?
+  b   e     0    ?
+  b   0     m    1 mod m
+  b   e     m    b^e mod m
+*/
+
+static void
+mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_ptr qp = tp;
+
+  if (dn == 1)
+    {
+      np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+    }
+  else if (dn == 2)
+    {
+      mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
+    }
+  else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
+	   BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
+    {
+      mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
+    }
+  else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
+	   BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+	   (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+	   + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
+    {
+      mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
+    }
+  else
+    {
+      /* We need to allocate separate remainder area, since mpn_mu_div_qr does
+	 not handle overlap between the numerator and remainder areas.
+	 FIXME: Make it handle such overlap.  */
+      mp_ptr rp, scratch;
+      mp_size_t itch;
+      TMP_DECL;
+      TMP_MARK;
+
+      itch = mpn_mu_div_qr_itch (nn, dn, 0);
+      rp = TMP_BALLOC_LIMBS (dn);
+      scratch = TMP_BALLOC_LIMBS (itch);
+
+      mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+      MPN_COPY (np, rp, dn);
+
+      TMP_FREE;
+    }
+}
+
+/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
+   t is defined by (tp,mn).  */
+static void
+reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
+{
+  mp_ptr rp, scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (rp, an, scratch, an - mn + 1);
+  MPN_COPY (rp, ap, an);
+  mod (rp, an, mp, mn, dinv, scratch);
+  MPN_COPY (tp, rp, mn);
+
+  TMP_FREE;
+}
+
+void
+mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+{
+  if (el < 20)
+    {
+      mp_ptr xp, tp, mp, bp, scratch;
+      mp_size_t xn, tn, mn, bn;
+      int m_zero_cnt;
+      int c;
+      mp_limb_t e, m2;
+      gmp_pi1_t dinv;
+      TMP_DECL;
+
+      mp = PTR(m);
+      mn = ABSIZ(m);
+      if (UNLIKELY (mn == 0))
+	DIVIDE_BY_ZERO;
+
+      if (el <= 1)
+	{
+	  if (el == 1)
+	    {
+	      mpz_mod (r, b, m);
+	      return;
+	    }
+	  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
+	     M equals 1.  */
+	  SIZ(r) = mn != 1 || mp[0] != 1;
+	  MPZ_NEWALLOC (r, 1)[0] = 1;
+	  return;
+	}
+
+      TMP_MARK;
+
+      /* Normalize m (i.e. make its most significant bit set) as required by
+	 division functions below.  */
+      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
+      m_zero_cnt -= GMP_NAIL_BITS;
+      if (m_zero_cnt != 0)
+	{
+	  mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
+	  mpn_lshift (new_mp, mp, mn, m_zero_cnt);
+	  mp = new_mp;
+	}
+
+      m2 = mn == 1 ? 0 : mp[mn - 2];
+      invert_pi1 (dinv, mp[mn - 1], m2);
+
+      bn = ABSIZ(b);
+      bp = PTR(b);
+      if (bn > mn)
+	{
+	  /* Reduce possibly huge base.  Use a function call to reduce, since we
+	     don't want the quotient allocation to live until function return.  */
+	  mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
+	  reduce (new_bp, bp, bn, mp, mn, &dinv);
+	  bp = new_bp;
+	  bn = mn;
+	  /* Canonicalize the base, since we are potentially going to multiply with
+	     it quite a few times.  */
+	  MPN_NORMALIZE (bp, bn);
+	}
+
+      if (bn == 0)
+	{
+	  SIZ(r) = 0;
+	  TMP_FREE;
+	  return;
+	}
+
+      TMP_ALLOC_LIMBS_3 (xp, mn, scratch, mn + 1, tp, 2 * mn + 1);
+
+      MPN_COPY (xp, bp, bn);
+      xn = bn;
+
+      e = el;
+      count_leading_zeros (c, e);
+      e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
+      c = GMP_LIMB_BITS - 1 - c;
+
+      ASSERT (c != 0); /* el > 1 */
+	{
+	  /* Main loop. */
+	  do
+	    {
+	      mpn_sqr (tp, xp, xn);
+	      tn = 2 * xn; tn -= tp[tn - 1] == 0;
+	      if (tn < mn)
+		{
+		  MPN_COPY (xp, tp, tn);
+		  xn = tn;
+		}
+	      else
+		{
+		  mod (tp, tn, mp, mn, &dinv, scratch);
+		  MPN_COPY (xp, tp, mn);
+		  xn = mn;
+		}
+
+	      if ((mp_limb_signed_t) e < 0)
+		{
+		  mpn_mul (tp, xp, xn, bp, bn);
+		  tn = xn + bn; tn -= tp[tn - 1] == 0;
+		  if (tn < mn)
+		    {
+		      MPN_COPY (xp, tp, tn);
+		      xn = tn;
+		    }
+		  else
+		    {
+		      mod (tp, tn, mp, mn, &dinv, scratch);
+		      MPN_COPY (xp, tp, mn);
+		      xn = mn;
+		    }
+		}
+	      e <<= 1;
+	      c--;
+	    }
+	  while (c != 0);
+	}
+
+      /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it
+	 with the original M.  */
+      if (m_zero_cnt != 0)
+	{
+	  mp_limb_t cy;
+	  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
+	  tp[xn] = cy; xn += cy != 0;
+
+	  if (xn >= mn)
+	    {
+	      mod (tp, xn, mp, mn, &dinv, scratch);
+	      xn = mn;
+	    }
+	  mpn_rshift (xp, tp, xn, m_zero_cnt);
+	}
+      MPN_NORMALIZE (xp, xn);
+
+      if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
+	{
+	  mp = PTR(m);			/* want original, unnormalized m */
+	  mpn_sub (xp, mp, mn, xp, xn);
+	  xn = mn;
+	  MPN_NORMALIZE (xp, xn);
+	}
+      MPZ_NEWALLOC (r, xn);
+      SIZ (r) = xn;
+      MPN_COPY (PTR(r), xp, xn);
+
+      TMP_FREE;
+    }
+  else
+    {
+      /* For large exponents, fake an mpz_t exponent and deflect to the more
+	 sophisticated mpz_powm.  */
+      mpz_t e;
+      mp_limb_t ep[LIMBS_PER_ULONG];
+      MPZ_FAKE_UI (e, ep, el);
+      mpz_powm (r, b, e, m);
+    }
+}

diff --git a/mpz/pprime_p.c b/mpz/pprime_p.c
new file mode 100644
index 0000000..b8a21c2
--- /dev/null
+++ b/mpz/pprime_p.c

@@ -0,0 +1,166 @@
+/* mpz_probab_prime_p --
+   An implementation of the probabilistic primality test found in Knuth's
+   Seminumerical Algorithms book.  If the function mpz_probab_prime_p()
+   returns 0 then n is not prime.  If it returns 1, then n is 'probably'
+   prime.  If it returns 2, n is surely prime.  The probability of a false
+   positive is (1/4)**reps, where reps is the number of internal passes of the
+   probabilistic algorithm.  Knuth indicates that 25 passes are reasonable.
+
+Copyright 1991, 1993, 1994, 1996-2002, 2005, 2015, 2016 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int isprime (unsigned long int);
+
+
+/* MPN_MOD_OR_MODEXACT_1_ODD can be used instead of mpn_mod_1 for the trial
+   division.  It gives a result which is not the actual remainder r but a
+   value congruent to r*2^n mod d.  Since all the primes being tested are
+   odd, r*2^n mod p will be 0 if and only if r mod p is 0.  */
+
+int
+mpz_probab_prime_p (mpz_srcptr n, int reps)
+{
+  mp_limb_t r;
+  mpz_t n2;
+
+  /* Handle small and negative n.  */
+  if (mpz_cmp_ui (n, 1000000L) <= 0)
+    {
+      if (mpz_cmpabs_ui (n, 1000000L) <= 0)
+	{
+	  int is_prime;
+	  unsigned long n0;
+	  n0 = mpz_get_ui (n);
+	  is_prime = n0 & (n0 > 1) ? isprime (n0) : n0 == 2;
+	  return is_prime ? 2 : 0;
+	}
+      /* Negative number.  Negate and fall out.  */
+      PTR(n2) = PTR(n);
+      SIZ(n2) = -SIZ(n);
+      n = n2;
+    }
+
+  /* If n is now even, it is not a prime.  */
+  if (mpz_even_p (n))
+    return 0;
+
+#if defined (PP)
+  /* Check if n has small factors.  */
+#if defined (PP_INVERTED)
+  r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP,
+			       (mp_limb_t) PP_INVERTED);
+#else
+  r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP);
+#endif
+  if (r % 3 == 0
+#if GMP_LIMB_BITS >= 4
+      || r % 5 == 0
+#endif
+#if GMP_LIMB_BITS >= 8
+      || r % 7 == 0
+#endif
+#if GMP_LIMB_BITS >= 16
+      || r % 11 == 0 || r % 13 == 0
+#endif
+#if GMP_LIMB_BITS >= 32
+      || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0
+#endif
+#if GMP_LIMB_BITS >= 64
+      || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0
+      || r % 47 == 0 || r % 53 == 0
+#endif
+      )
+    {
+      return 0;
+    }
+#endif /* PP */
+
+  /* Do more dividing.  We collect small primes, using umul_ppmm, until we
+     overflow a single limb.  We divide our number by the small primes product,
+     and look for factors in the remainder.  */
+  {
+    unsigned long int ln2;
+    unsigned long int q;
+    mp_limb_t p1, p0, p;
+    unsigned int primes[15];
+    int nprimes;
+
+    nprimes = 0;
+    p = 1;
+    ln2 = mpz_sizeinbase (n, 2);	/* FIXME: tune this limit */
+    for (q = PP_FIRST_OMITTED; q < ln2; q += 2)
+      {
+	if (isprime (q))
+	  {
+	    umul_ppmm (p1, p0, p, q);
+	    if (p1 != 0)
+	      {
+		r = MPN_MOD_OR_MODEXACT_1_ODD (PTR(n), (mp_size_t) SIZ(n), p);
+		while (--nprimes >= 0)
+		  if (r % primes[nprimes] == 0)
+		    {
+		      ASSERT_ALWAYS (mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) primes[nprimes]) == 0);
+		      return 0;
+		    }
+		p = q;
+		nprimes = 0;
+	      }
+	    else
+	      {
+		p = p0;
+	      }
+	    primes[nprimes++] = q;
+	  }
+      }
+  }
+
+  /* Perform a number of Miller-Rabin tests.  */
+  return mpz_millerrabin (n, reps);
+}
+
+static int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  ASSERT (t >= 3 && (t & 1) != 0);
+
+  d = 3;
+  do {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+	return 1;
+      d += 2;
+  } while (r != 0);
+  return 0;
+}

diff --git a/mpz/primorial_ui.c b/mpz/primorial_ui.c
new file mode 100644
index 0000000..b1176c7
--- /dev/null
+++ b/mpz/primorial_ui.c

@@ -0,0 +1,131 @@
+/* mpz_primorial_ui(RES, N) -- Set RES to N# the product of primes <= N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012, 2015, 2016, 2021 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
+  do {								\
+    if ((PR) > (MAX_PR)) {					\
+      (VEC)[(I)++] = (PR);					\
+      (PR) = (P);						\
+    } else							\
+      (PR) *= (P);						\
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section primorial: implementation                     */
+/*********************************************************/
+
+void
+mpz_primorial_ui (mpz_ptr res, unsigned long n)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+
+  if (n < 5)
+    {
+      /* The smallest 5 results for primorial are stored */
+      /* in a 15-bits constant (five octal digits)	 */
+      MPZ_NEWALLOC (res, 1)[0] = (066211 >> (n * 3)) & 7;
+      SIZ (res) = 1;
+    }
+  else
+    {
+      mp_limb_t *sieve, *factors;
+      mp_size_t size, j;
+      mp_limb_t prod;
+      TMP_DECL;
+
+      /* Try to estimate the result size, to avoid	*/
+      /* resizing, and to initially store the sieve.	*/
+      size = n / GMP_NUMB_BITS;
+      size = size + (size >> 1) + 1;
+      ASSERT (size >= primesieve_size (n));
+      sieve = MPZ_NEWALLOC (res, size);
+      size = (gmp_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+
+      TMP_MARK;
+      factors = TMP_ALLOC_LIMBS (size);
+
+      j = 0;
+
+      prod = 6;
+
+      /* Store primes from 5 to n */
+      {
+	mp_limb_t max_prod;
+
+	max_prod = GMP_NUMB_MAX / n;
+
+	/* Loop on sieved primes. */
+	for (mp_limb_t i = 4, *sp = sieve; i < n; i += GMP_LIMB_BITS * 3)
+	  for (mp_limb_t b = i, x = ~ *(sp++); x != 0; b += 3, x >>= 1)
+	    if (x & 1)
+	      {
+		mp_limb_t prime = b | 1;
+		FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+	      }
+      }
+
+      if (j != 0)
+	{
+	  factors[j++] = prod;
+	  mpz_prodlimbs (res, factors, j);
+	}
+      else
+	{
+	  PTR (res)[0] = prod;
+	  SIZ (res) = 1;
+	}
+
+      TMP_FREE;
+    }
+}

diff --git a/mpz/prodlimbs.c b/mpz/prodlimbs.c
new file mode 100644
index 0000000..23f06a1
--- /dev/null
+++ b/mpz/prodlimbs.c

@@ -0,0 +1,108 @@
+/* mpz_prodlimbs(RESULT, V, LEN) -- Set RESULT to V[0]*V[1]*...*V[LEN-1].
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010-2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+/*********************************************************/
+/* Section list-prod: product of a list -> mpz_t         */
+/*********************************************************/
+
+/* FIXME: should be tuned */
+#ifndef RECURSIVE_PROD_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD)
+#endif
+
+/* Computes the product of the j>1 limbs pointed by factors, puts the
+ * result in x. It assumes that all limbs are non-zero. Above
+ * Karatsuba's threshold it uses a binary splitting strategy, to gain
+ * speed by the asymptotically fast multiplication algorithms.
+ *
+ * The list in  {factors, j} is overwritten.
+ * Returns the size of the result
+ */
+
+mp_size_t
+mpz_prodlimbs (mpz_ptr x, mp_ptr factors, mp_size_t j)
+{
+  mp_limb_t cy;
+  mp_size_t size, i;
+  mp_ptr    prod;
+
+  ASSERT (j > 1);
+  ASSERT (RECURSIVE_PROD_THRESHOLD > 3);
+
+  if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
+    j--;
+    size = 1;
+
+    for (i = 1; i < j; i++)
+      {
+	cy = mpn_mul_1 (factors, factors, size, factors[i]);
+	factors[size] = cy;
+	size += cy != 0;
+      };
+
+    prod = MPZ_NEWALLOC (x, size + 1);
+
+    cy = mpn_mul_1 (prod, factors, size, factors[i]);
+    prod[size] = cy;
+    return SIZ (x) = size + (cy != 0);
+  } else {
+    mpz_t x1, x2;
+    TMP_DECL;
+
+    i = j >> 1;
+    j -= i;
+    TMP_MARK;
+
+    MPZ_TMP_INIT (x2, j);
+
+    PTR (x1) = factors + i;
+    ALLOC (x1) = j;
+    j = mpz_prodlimbs (x2, factors + i, j);
+    i = mpz_prodlimbs (x1, factors, i);
+    size = i + j;
+    prod = MPZ_NEWALLOC (x, size);
+    if (i >= j)
+      cy = mpn_mul (prod, PTR(x1), i, PTR(x2), j);
+    else
+      cy = mpn_mul (prod, PTR(x2), j, PTR(x1), i);
+    TMP_FREE;
+
+    return SIZ (x) = size - (cy == 0);
+  }
+}

diff --git a/mpz/random.c b/mpz/random.c
new file mode 100644
index 0000000..1a1e515
--- /dev/null
+++ b/mpz/random.c

@@ -0,0 +1,39 @@
+/* mpz_random -- Generate a random mpz_t of specified size in limbs.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_random (mpz_ptr x, mp_size_t size)
+{
+  mpz_urandomb (x, RANDS, (unsigned long) (ABS (size) * GMP_NUMB_BITS));
+  if (size < 0)
+    SIZ(x) = -SIZ(x);
+}

diff --git a/mpz/random2.c b/mpz/random2.c
new file mode 100644
index 0000000..2c72540
--- /dev/null
+++ b/mpz/random2.c

@@ -0,0 +1,51 @@
+/* mpz_random2 -- Generate a positive random mpz_t of specified size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Meant for testing of other MP routines.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_random2 (mpz_ptr x, mp_size_t size)
+{
+  mp_size_t abs_size;
+  mp_ptr xp;
+
+  abs_size = ABS (size);
+  if (abs_size != 0)
+    {
+      xp = MPZ_NEWALLOC (x, abs_size);
+
+      mpn_random2 (xp, abs_size);
+    }
+
+  SIZ (x) = size;
+}

diff --git a/mpz/realloc.c b/mpz/realloc.c
new file mode 100644
index 0000000..4288ef1
--- /dev/null
+++ b/mpz/realloc.c

@@ -0,0 +1,70 @@
+/* _mpz_realloc -- make the mpz_t have NEW_ALLOC digits allocated.
+
+Copyright 1991, 1993-1995, 2000, 2001, 2008, 2015, 2021, 2022 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void *
+_mpz_realloc (mpz_ptr m, mp_size_t new_alloc)
+{
+  mp_ptr mp;
+
+  /* Never allocate zero space. */
+  new_alloc = MAX (new_alloc, 1);
+
+  if (sizeof (mp_size_t) == sizeof (int))
+    {
+      if (UNLIKELY (new_alloc > ULONG_MAX / GMP_NUMB_BITS))
+	MPZ_OVERFLOW;
+    }
+  else
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+	MPZ_OVERFLOW;
+    }
+
+  if (ALLOC (m) == 0)
+    {
+      mp = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+    }
+  else
+    {
+      mp = __GMP_REALLOCATE_FUNC_LIMBS (PTR (m), ALLOC (m), new_alloc);
+
+      /* Don't create an invalid number; if the current value doesn't fit after
+	 reallocation, clear it to 0.  */
+      if (UNLIKELY (ABSIZ (m) > new_alloc))
+	SIZ (m) = 0;
+    }
+
+  PTR (m) = mp;
+  ALLOC(m) = new_alloc;
+  return (void *) mp;
+}

diff --git a/mpz/realloc2.c b/mpz/realloc2.c
new file mode 100644
index 0000000..633077e
--- /dev/null
+++ b/mpz/realloc2.c

@@ -0,0 +1,63 @@
+/* mpz_realloc2 -- change allocated data size.
+
+Copyright 2001, 2002, 2008, 2015, 2021, 2022 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_realloc2 (mpz_ptr m, mp_bitcnt_t bits)
+{
+  mp_size_t new_alloc;
+
+  bits -= (bits != 0);		/* Round down, except if 0 */
+  new_alloc = 1 + bits / GMP_NUMB_BITS;
+
+  if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+	MPZ_OVERFLOW;
+    }
+
+  if (ALLOC (m) == 0)
+    {
+      PTR (m) = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+    }
+  else
+    {
+      PTR (m) = __GMP_REALLOCATE_FUNC_LIMBS (PTR(m), ALLOC(m), new_alloc);
+
+      /* Don't create an invalid number; if the current value doesn't fit after
+	 reallocation, clear it to 0.  */
+      if (ABSIZ(m) > new_alloc)
+	SIZ(m) = 0;
+    }
+
+  ALLOC(m) = new_alloc;
+}

diff --git a/mpz/remove.c b/mpz/remove.c
new file mode 100644
index 0000000..a655121
--- /dev/null
+++ b/mpz/remove.c

@@ -0,0 +1,146 @@
+/* mpz_remove -- divide out a factor and return its multiplicity.
+
+Copyright 1998-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mp_bitcnt_t
+mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
+{
+  mp_bitcnt_t pwr;
+  mp_srcptr fp;
+  mp_size_t sn, fn, afn;
+  mp_limb_t fp0;
+
+  sn = SIZ (src);
+  fn = SIZ (f);
+  fp = PTR (f);
+  afn = ABS (fn);
+  fp0 = fp[0];
+
+  if (UNLIKELY ((afn <= (fp0 == 1)) /* mpz_cmpabs_ui (f, 1) <= 0 */
+		| (sn == 0)))
+    {
+      /*  f = 0 or f = +- 1 or src = 0 */
+      if (afn == 0)
+	DIVIDE_BY_ZERO;
+      mpz_set (dest, src);
+      return 0;
+    }
+
+  if ((fp0 & 1) != 0)
+    { /* f is odd */
+      mp_ptr dp;
+      mp_size_t dn;
+
+      dn = ABS (sn);
+      dp = MPZ_REALLOC (dest, dn);
+
+      pwr = mpn_remove (dp, &dn, PTR(src), dn, PTR(f), afn, ~(mp_bitcnt_t) 0);
+
+      SIZ (dest) = ((pwr & (fn < 0)) ^ (sn < 0)) ? -dn : dn;
+    }
+  else if (afn == (fp0 == 2))
+    { /* mpz_cmpabs_ui (f, 2) == 0 */
+      pwr = mpz_scan1 (src, 0);
+      mpz_div_2exp (dest, src, pwr);
+      if (pwr & (fn < 0)) /*((pwr % 2 == 1) && (SIZ (f) < 0))*/
+	mpz_neg (dest, dest);
+    }
+  else
+    { /* f != +-2 */
+      mpz_t x, rem;
+
+      mpz_init (rem);
+      mpz_init (x);
+
+      pwr = 0;
+      mpz_tdiv_qr (x, rem, src, f);
+      if (SIZ (rem) == 0)
+	{
+	  mpz_t fpow[GMP_LIMB_BITS];		/* Really MP_SIZE_T_BITS */
+	  int p;
+
+#if WANT_ORIGINAL_DEST
+	  mp_ptr dp;
+	  dp = PTR (dest);
+#endif
+      /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
+	 upper bound of the result we're seeking.  We could also shift down the
+	 operands so that they become odd, to make intermediate values
+	 smaller.  */
+	  mpz_init_set (fpow[0], f);
+	  mpz_swap (dest, x);
+
+	  p = 1;
+      /* Divide by f, f^2 ... f^(2^k) until we get a remainder for f^(2^k).  */
+	  while (ABSIZ (dest) >= 2 * ABSIZ (fpow[p - 1]) - 1)
+	    {
+	      mpz_init (fpow[p]);
+	      mpz_mul (fpow[p], fpow[p - 1], fpow[p - 1]);
+	      mpz_tdiv_qr (x, rem, dest, fpow[p]);
+	      if (SIZ (rem) != 0) {
+		mpz_clear (fpow[p]);
+		break;
+	      }
+	      mpz_swap (dest, x);
+	      p++;
+	    }
+
+	  pwr = ((mp_bitcnt_t)1 << p) - 1;
+
+      /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give
+	 a zero remainder.  */
+	  while (--p >= 0)
+	    {
+	      mpz_tdiv_qr (x, rem, dest, fpow[p]);
+	      if (SIZ (rem) == 0)
+		{
+		  pwr += (mp_bitcnt_t)1 << p;
+		  mpz_swap (dest, x);
+		}
+	      mpz_clear (fpow[p]);
+	    }
+
+#if WANT_ORIGINAL_DEST
+	  if (PTR (x) == dp) {
+	    mpz_swap (dest, x);
+	    mpz_set (dest, x);
+	  }
+#endif
+	}
+      else
+	mpz_set (dest, src);
+
+      mpz_clear (x);
+      mpz_clear (rem);
+    }
+
+  return pwr;
+}

diff --git a/mpz/roinit_n.c b/mpz/roinit_n.c
new file mode 100644
index 0000000..9125466
--- /dev/null
+++ b/mpz/roinit_n.c

@@ -0,0 +1,43 @@
+/* mpz_roinit_n -- Initialize mpz with read-only limb array.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+mpz_srcptr
+mpz_roinit_n (mpz_ptr x, mp_srcptr xp, mp_size_t xs)
+{
+  mp_size_t xn = ABS(xs);
+  MPN_NORMALIZE (xp, xn);
+
+  ALLOC (x) = 0;
+  SIZ (x) = xs < 0 ? -xn : xn;
+  PTR (x) = (mp_ptr) xp;
+  return x;
+}

diff --git a/mpz/root.c b/mpz/root.c
new file mode 100644
index 0000000..7c8d368
--- /dev/null
+++ b/mpz/root.c

@@ -0,0 +1,90 @@
+/* mpz_root(root, u, nth) --  Set ROOT to floor(U^(1/nth)).
+   Return an indication if the result is exact.
+
+Copyright 1999-2003, 2005, 2012, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>		/* for NULL */
+#include "gmp-impl.h"
+
+int
+mpz_root (mpz_ptr root, mpz_srcptr u, unsigned long int nth)
+{
+  mp_ptr rootp, up;
+  mp_size_t us, un, rootn, remn;
+  TMP_DECL;
+
+  us = SIZ(u);
+
+  /* even roots of negatives provoke an exception */
+  if (UNLIKELY (us < 0 && (nth & 1) == 0))
+    SQRT_OF_NEGATIVE;
+
+  if (UNLIKELY (nth <= 1))
+    {
+      /* root extraction interpreted as c^(1/nth) means a zeroth root should
+	 provoke a divide by zero, do this even if c==0 */
+      if (UNLIKELY (nth == 0))
+	DIVIDE_BY_ZERO;
+      /* nth == 1 */
+      if (root != NULL && u != root)
+	mpz_set (root, u);
+      return 1;			/* exact result */
+    }
+
+  if (us == 0)
+    {
+      if (root != NULL)
+	SIZ(root) = 0;
+      return 1;			/* exact result */
+    }
+
+  un = ABS (us);
+  rootn = (un - 1) / nth + 1;
+
+  TMP_MARK;
+
+  /* FIXME: Perhaps disallow root == NULL */
+  if (root != NULL && u != root)
+    rootp = MPZ_NEWALLOC (root, rootn);
+  else
+    rootp = TMP_ALLOC_LIMBS (rootn);
+
+  up = PTR(u);
+  remn = mpn_rootrem (rootp, NULL, up, un, (mp_limb_t) nth);
+
+  if (root != NULL)
+    {
+      SIZ(root) = us >= 0 ? rootn : -rootn;
+      if (u == root)
+	MPN_COPY (up, rootp, rootn);
+    }
+
+  TMP_FREE;
+  return remn == 0;
+}

diff --git a/mpz/rootrem.c b/mpz/rootrem.c
new file mode 100644
index 0000000..67953db
--- /dev/null
+++ b/mpz/rootrem.c

@@ -0,0 +1,100 @@
+/* mpz_rootrem(root, rem, u, nth) --  Set ROOT to trunc(U^(1/nth)) and
+   set REM to the remainder.
+
+Copyright 1999-2003, 2005, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>		/* for NULL */
+#include "gmp-impl.h"
+
+void
+mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
+{
+  mp_ptr rootp, up, remp;
+  mp_size_t us, un, rootn, remn;
+  TMP_DECL;
+
+  us = SIZ(u);
+
+  /* even roots of negatives provoke an exception */
+  if (UNLIKELY (us < 0 && (nth & 1) == 0))
+    SQRT_OF_NEGATIVE;
+
+  /* root extraction interpreted as c^(1/nth) means a zeroth root should
+     provoke a divide by zero, do this even if c==0 */
+  if (UNLIKELY (nth == 0))
+    DIVIDE_BY_ZERO;
+
+  if (us == 0)
+    {
+      if (root != NULL)
+	SIZ(root) = 0;
+      SIZ(rem) = 0;
+      return;
+    }
+
+  un = ABS (us);
+  rootn = (un - 1) / nth + 1;
+
+  TMP_MARK;
+
+  /* FIXME: Perhaps disallow root == NULL */
+  if (root != NULL && u != root)
+    rootp = MPZ_NEWALLOC (root, rootn);
+  else
+    rootp = TMP_ALLOC_LIMBS (rootn);
+
+  if (u != rem)
+    remp = MPZ_NEWALLOC (rem, un);
+  else
+    remp = TMP_ALLOC_LIMBS (un);
+
+  up = PTR(u);
+
+  if (nth == 1)
+    {
+      MPN_COPY (rootp, up, un);
+      remn = 0;
+    }
+  else
+    {
+      remn = mpn_rootrem (rootp, remp, up, un, (mp_limb_t) nth);
+    }
+
+  if (root != NULL)
+    {
+      SIZ(root) = us >= 0 ? rootn : -rootn;
+      if (u == root)
+	MPN_COPY (up, rootp, rootn);
+    }
+
+  if (u == rem)
+    MPN_COPY (up, remp, remn);
+  SIZ(rem) = us >= 0 ? remn : -remn;
+  TMP_FREE;
+}

diff --git a/mpz/rrandomb.c b/mpz/rrandomb.c
new file mode 100644
index 0000000..ae0c9e8
--- /dev/null
+++ b/mpz/rrandomb.c

@@ -0,0 +1,102 @@
+/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Meant for testing of other MP routines.
+
+Copyright 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+static void gmp_rrandomb (mp_ptr, gmp_randstate_ptr, mp_bitcnt_t);
+
+void
+mpz_rrandomb (mpz_ptr x, gmp_randstate_ptr rstate, mp_bitcnt_t nbits)
+{
+  mp_size_t nl;
+  mp_ptr xp;
+
+  nl = BITS_TO_LIMBS (nbits);
+  if (nbits != 0)
+    {
+      xp = MPZ_NEWALLOC (x, nl);
+      gmp_rrandomb (xp, rstate, nbits);
+    }
+
+  SIZ(x) = nl;
+}
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+   Thus, we get the same random number sequence in the common cases.
+   FIXME: We should always generate the same random number sequence!  */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_ptr rstate, mp_bitcnt_t nbits)
+{
+  mp_bitcnt_t bi;
+  mp_limb_t ranm;		/* buffer for random bits */
+  unsigned cap_chunksize, chunksize;
+  mp_size_t i;
+
+  /* Set entire result to 111..1  */
+  i = BITS_TO_LIMBS (nbits) - 1;
+  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+  for (i = i - 1; i >= 0; i--)
+    rp[i] = GMP_NUMB_MAX;
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  cap_chunksize = nbits / (ranm % 4 + 1);
+  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+  bi = nbits;
+
+  for (;;)
+    {
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      if (bi == 0)
+	break;			/* low chunk is ...1 */
+
+      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+      if (bi == 0)
+	break;			/* low chunk is ...0 */
+    }
+}

diff --git a/mpz/scan0.c b/mpz/scan0.c
new file mode 100644
index 0000000..4b0f1ef
--- /dev/null
+++ b/mpz/scan0.c

@@ -0,0 +1,129 @@
+/* mpz_scan0 -- search for a 0 bit.
+
+Copyright 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* mpn_scan0 can't be used for the u>0 search since there might not be a 0
+   bit before the end of the data.  mpn_scan1 could be used for the inverted
+   search under u<0, but usually the search won't go very far so it seems
+   reasonable to inline that code.  */
+
+mp_bitcnt_t
+mpz_scan0 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr = PTR(u);
+  mp_size_t      size = SIZ(u);
+  mp_size_t      abs_size = ABS(size);
+  mp_srcptr      u_end = u_ptr + abs_size;
+  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;
+  mp_srcptr      p = u_ptr + starting_limb;
+  mp_limb_t      limb;
+  int            cnt;
+
+  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+     u<0.  Notice this test picks up all cases of u==0 too. */
+  if (starting_limb >= abs_size)
+    return (size >= 0 ? starting_bit : ~(mp_bitcnt_t) 0);
+
+  limb = *p;
+
+  if (size >= 0)
+    {
+      /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;
+
+      /* Search for a limb which isn't all ones.  If the end is reached then
+	 the zero bit immediately past the end is returned.  */
+      while (limb == GMP_NUMB_MAX)
+	{
+	  p++;
+	  if (p == u_end)
+	    return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+	  limb = *p;
+	}
+
+      /* Now seek low 1 bit. */
+      limb = ~limb;
+    }
+  else
+    {
+      mp_srcptr  q;
+
+      /* If there's a non-zero limb before ours then we're in the ones
+	 complement region.  Search from *(p-1) downwards since that might
+	 give better cache locality, and since a non-zero in the middle of a
+	 number is perhaps a touch more likely than at the end.  */
+      q = p;
+      while (q != u_ptr)
+	{
+	  q--;
+	  if (*q != 0)
+	    goto inverted;
+	}
+
+      /* Adjust so ~limb implied by searching for 1 bit below becomes -limb.
+	 If limb==0 here then this isn't the beginning of twos complement
+	 inversion, but that doesn't matter because limb==0 is a zero bit
+	 immediately (-1 is all ones for below).  */
+      limb--;
+
+    inverted:
+      /* Now seeking a 1 bit. */
+
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));
+
+      if (limb == 0)
+	{
+	  /* If the high limb is zero after masking, then no 1 bits past
+	     starting_bit.  */
+	  p++;
+	  if (p == u_end)
+	    return ~(mp_bitcnt_t) 0;
+
+	  /* Search further for a non-zero limb.  The high limb is non-zero,
+	     if nothing else.  */
+	  for (;;)
+	    {
+	      limb = *p;
+	      if (limb != 0)
+		break;
+	      p++;
+	      ASSERT (p < u_end);
+	    }
+	}
+    }
+
+  ASSERT (limb != 0);
+  count_trailing_zeros (cnt, limb);
+  return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
+}

diff --git a/mpz/scan1.c b/mpz/scan1.c
new file mode 100644
index 0000000..d096147
--- /dev/null
+++ b/mpz/scan1.c

@@ -0,0 +1,123 @@
+/* mpz_scan1 -- search for a 1 bit.
+
+Copyright 2000-2002, 2004, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* mpn_scan0 can't be used for the inverted u<0 search since there might not
+   be a 0 bit before the end of the data.  mpn_scan1 could be used under u>0
+   (except when in the high limb), but usually the search won't go very far
+   so it seems reasonable to inline that code.  */
+
+mp_bitcnt_t
+mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr = PTR(u);
+  mp_size_t      size = SIZ(u);
+  mp_size_t      abs_size = ABS(size);
+  mp_srcptr      u_end = u_ptr + abs_size - 1;
+  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;
+  mp_srcptr      p = u_ptr + starting_limb;
+  mp_limb_t      limb;
+  int            cnt;
+
+  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit for u<0.
+     Notice this test picks up any u==0 too. */
+  if (starting_limb >= abs_size)
+    return (size >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+  /* This is an important case, where sign is not relevant! */
+  if (starting_bit == 0)
+    goto short_cut;
+
+  limb = *p;
+
+  if (size >= 0)
+    {
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));
+
+      if (limb == 0)
+	{
+	  /* If it's the high limb which is zero after masking, then there's
+	     no 1 bits after starting_bit.  */
+	  if (p == u_end)
+	    return ~(mp_bitcnt_t) 0;
+
+	  /* Otherwise search further for a non-zero limb.  The high limb is
+	     non-zero, if nothing else.  */
+	search_nonzero:
+	  do
+	    {
+	      ASSERT (p != u_end);
+	      p++;
+	    short_cut:
+	      limb = *p;
+	    }
+	  while (limb == 0);
+	}
+    }
+  else
+    {
+      /* If there's a non-zero limb before ours then we're in the ones
+	 complement region.  */
+      if (starting_limb == 0 || mpn_zero_p (u_ptr, starting_limb)) {
+	if (limb == 0)
+	  /* Seeking for the first non-zero bit, it is the same for u and -u. */
+	  goto search_nonzero;
+
+	/* Adjust so ~limb implied by searching for 0 bit becomes -limb.  */
+	limb--;
+      }
+
+      /* Now seeking a 0 bit. */
+
+      /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;
+
+      /* Search for a limb which is not all ones.  If the end is reached
+	 then the zero immediately past the end is the result.  */
+      while (limb == GMP_NUMB_MAX)
+	{
+	  if (p == u_end)
+	    return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+	  p++;
+	  limb = *p;
+	}
+
+      /* Now seeking low 1 bit. */
+      limb = ~limb;
+    }
+
+  ASSERT (limb != 0);
+  count_trailing_zeros (cnt, limb);
+  return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
+}

diff --git a/mpz/set.c b/mpz/set.c
new file mode 100644
index 0000000..7789af3
--- /dev/null
+++ b/mpz/set.c

@@ -0,0 +1,49 @@
+/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
+
+Copyright 1991, 1993-1995, 2000, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpz_set (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp, up;
+  mp_size_t usize, size;
+
+  usize = SIZ(u);
+  size = ABS (usize);
+
+  wp = MPZ_NEWALLOC (w, size);
+
+  up = PTR(u);
+
+  MPN_COPY (wp, up, size);
+  SIZ(w) = usize;
+}

diff --git a/mpz/set_d.c b/mpz/set_d.c
new file mode 100644
index 0000000..8cbda11
--- /dev/null
+++ b/mpz/set_d.c

@@ -0,0 +1,113 @@
+/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL.
+
+Copyright 1995, 1996, 2000-2003, 2006, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp-impl.h"
+
+
+/* We used to have a special case for d < MP_BASE_AS_DOUBLE, just casting
+   double -> limb.  Unfortunately gcc 3.3 on powerpc970-apple-darwin6.8.5
+   got this wrong.  (It assumed __fixunsdfdi returned its result in a single
+   64-bit register, where instead that function followed the calling
+   conventions and gave the result in two parts r3 and r4.)  Hence the use
+   of __gmp_extract_double in all cases.  */
+
+void
+mpz_set_d (mpz_ptr r, double d)
+{
+  int negative;
+  mp_limb_t tp[LIMBS_PER_DOUBLE];
+  mp_ptr rp;
+  mp_size_t rn;
+
+  DOUBLE_NAN_INF_ACTION (d,
+			 __gmp_invalid_operation (),
+			 __gmp_invalid_operation ());
+
+  negative = d < 0;
+  d = ABS (d);
+
+  rn = __gmp_extract_double (tp, d);
+
+  if (rn <= 0)
+    rn = 0;
+
+  rp = MPZ_NEWALLOC (r, rn);
+
+  switch (rn)
+    {
+    default:
+      MPN_ZERO (rp, rn - LIMBS_PER_DOUBLE);
+      rp += rn - LIMBS_PER_DOUBLE;
+      /* fall through */
+#if LIMBS_PER_DOUBLE == 2
+    case 2:
+      rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 1:
+      rp[0] = tp[1];
+      break;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+    case 3:
+      rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 2:
+      rp[1] = tp[2], rp[0] = tp[1];
+      break;
+    case 1:
+      rp[0] = tp[2];
+      break;
+#endif
+#if LIMBS_PER_DOUBLE == 4
+    case 4:
+      rp[3] = tp[3], rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 3:
+      rp[2] = tp[3], rp[1] = tp[2], rp[0] = tp[1];
+      break;
+    case 2:
+      rp[1] = tp[3], rp[0] = tp[2];
+      break;
+    case 1:
+      rp[0] = tp[3];
+      break;
+#endif
+    case 0:
+      break;
+    }
+
+  SIZ(r) = negative ? -rn : rn;
+}

diff --git a/mpz/set_f.c b/mpz/set_f.c
new file mode 100644
index 0000000..b3ecc0b
--- /dev/null
+++ b/mpz/set_f.c

@@ -0,0 +1,71 @@
+/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
+
+Copyright 1996, 2001, 2012, 2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpz_set_f (mpz_ptr w, mpf_srcptr u)
+{
+  mp_ptr    wp, up;
+  mp_size_t size;
+  mp_exp_t  exp;
+
+  /* abs(u)<1 truncates to zero */
+  exp = EXP (u);
+  if (exp <= 0)
+    {
+      SIZ(w) = 0;
+      return;
+    }
+
+  wp = MPZ_NEWALLOC (w, exp);
+  up = PTR(u);
+
+  size = SIZ (u);
+  SIZ(w) = (size >= 0 ? exp : -exp);
+  size = ABS (size);
+
+  if (exp > size)
+    {
+      /* pad with low zeros to get a total "exp" many limbs */
+      mp_size_t  zeros = exp - size;
+      MPN_ZERO (wp, zeros);
+      wp += zeros;
+    }
+  else
+    {
+      /* exp<=size, truncate to the high "exp" many limbs */
+      up += (size - exp);
+      size = exp;
+    }
+
+  MPN_COPY (wp, up, size);
+}

diff --git a/mpz/set_q.c b/mpz/set_q.c
new file mode 100644
index 0000000..2280247
--- /dev/null
+++ b/mpz/set_q.c

@@ -0,0 +1,34 @@
+/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from
+   SRC_rational.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_set_q 1
+
+#include "gmp-impl.h"

diff --git a/mpz/set_si.c b/mpz/set_si.c
new file mode 100644
index 0000000..973aef8
--- /dev/null
+++ b/mpz/set_si.c

@@ -0,0 +1,55 @@
+/* mpz_set_si(dest,val) -- Assign DEST with a small value VAL.
+
+Copyright 1991, 1993-1995, 2000-2002, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_set_si (mpz_ptr dest, signed long int val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  MPZ_NEWALLOC (dest, 1)[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if GMP_NAIL_BITS != 0
+  if (vl > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      PTR (dest)[1] = vl >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  SIZ (dest) = val >= 0 ? size : -size;
+}

diff --git a/mpz/set_str.c b/mpz/set_str.c
new file mode 100644
index 0000000..26c102b
--- /dev/null
+++ b/mpz/set_str.c

@@ -0,0 +1,144 @@
+/* mpz_set_str(mp_dest, string, base) -- Convert the \0-terminated
+   string STRING in base BASE to multiple precision integer in
+   MP_DEST.  Allow white space in the string.  If BASE == 0 determine
+   the base in the C standard way, i.e.  0xhh...h means base 16,
+   0oo...o means base 8, otherwise assume base 10.
+
+Copyright 1991, 1993, 1994, 1996-1998, 2000-2003, 2005, 2011-2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <string.h>
+#include <ctype.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define digit_value_tab __gmp_digit_value_tab
+
+int
+mpz_set_str (mpz_ptr x, const char *str, int base)
+{
+  size_t str_size;
+  char *s, *begs;
+  size_t i;
+  mp_size_t xsize;
+  int c;
+  int negative;
+  const unsigned char *digit_value;
+  TMP_DECL;
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 208;
+      if (UNLIKELY (base > 62))
+	return -1;		/* too large base */
+    }
+
+  /* Skip whitespace.  */
+  do
+    c = (unsigned char) *str++;
+  while (isspace (c));
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = (unsigned char) *str++;
+    }
+
+  if (digit_value[c] >= (base == 0 ? 10 : base))
+    return -1;			/* error if no valid digits */
+
+  /* If BASE is 0, try to find out the base by looking at the initial
+     characters.  */
+  if (base == 0)
+    {
+      base = 10;
+      if (c == '0')
+	{
+	  base = 8;
+	  c = (unsigned char) *str++;
+	  if (c == 'x' || c == 'X')
+	    {
+	      base = 16;
+	      c = (unsigned char) *str++;
+	    }
+	  else if (c == 'b' || c == 'B')
+	    {
+	      base = 2;
+	      c = (unsigned char) *str++;
+	    }
+	}
+    }
+
+  /* Skip leading zeros and white space.  */
+  while (c == '0' || isspace (c))
+    c = (unsigned char) *str++;
+  /* Make sure the string does not become empty, mpn_set_str would fail.  */
+  if (c == 0)
+    {
+      SIZ (x) = 0;
+      return 0;
+    }
+
+  TMP_MARK;
+  str_size = strlen (str - 1);
+  s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+  /* Remove spaces from the string and convert the result from ASCII to a
+     byte array.  */
+  for (i = 0; i < str_size; i++)
+    {
+      if (!isspace (c))
+	{
+	  int dig = digit_value[c];
+	  if (UNLIKELY (dig >= base))
+	    {
+	      TMP_FREE;
+	      return -1;
+	    }
+	  *s++ = dig;
+	}
+      c = (unsigned char) *str++;
+    }
+
+  str_size = s - begs;
+
+  LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
+  MPZ_NEWALLOC (x, xsize);
+
+  /* Convert the byte array in base BASE to our bignum format.  */
+  xsize = mpn_set_str (PTR (x), (unsigned char *) begs, str_size, base);
+  SIZ (x) = negative ? -xsize : xsize;
+
+  TMP_FREE;
+  return 0;
+}

diff --git a/mpz/set_ui.c b/mpz/set_ui.c
new file mode 100644
index 0000000..12e95d1
--- /dev/null
+++ b/mpz/set_ui.c

@@ -0,0 +1,52 @@
+/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2004, 2012, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_set_ui (mpz_ptr dest, unsigned long int val)
+{
+  mp_size_t size;
+
+  MPZ_NEWALLOC (dest, 1)[0] = val & GMP_NUMB_MASK;
+  size = val != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (val > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      PTR (dest)[1] = val >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  SIZ (dest) = size;
+}

diff --git a/mpz/setbit.c b/mpz/setbit.c
new file mode 100644
index 0000000..228a564
--- /dev/null
+++ b/mpz/setbit.c

@@ -0,0 +1,104 @@
+/* mpz_setbit -- set a specified bit.
+
+Copyright 1991, 1993-1995, 1997, 1999, 2001, 2002, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_idx)
+{
+  mp_size_t dsize = SIZ (d);
+  mp_ptr dp = PTR (d);
+  mp_size_t limb_idx;
+  mp_limb_t mask;
+
+  limb_idx = bit_idx / GMP_NUMB_BITS;
+  mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
+  if (dsize >= 0)
+    {
+      if (limb_idx < dsize)
+	{
+	  dp[limb_idx] |= mask;
+	}
+      else
+	{
+	  /* Ugh.  The bit should be set outside of the end of the
+	     number.  We have to increase the size of the number.  */
+	  dp = MPZ_REALLOC (d, limb_idx + 1);
+	  SIZ (d) = limb_idx + 1;
+	  MPN_ZERO (dp + dsize, limb_idx - dsize);
+	  dp[limb_idx] = mask;
+	}
+    }
+  else
+    {
+      /* Simulate two's complement arithmetic, i.e. simulate
+	 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+	 2. Set the bit.
+	 3. Set OP = ~OP + 1.  */
+
+      dsize = -dsize;
+
+      if (limb_idx < dsize)
+	{
+	  mp_size_t zero_bound;
+	  /* No index upper bound on this loop, we're sure there's a non-zero limb
+	     sooner or later.  */
+	  zero_bound = 0;
+	  while (dp[zero_bound] == 0)
+	    zero_bound++;
+
+	  if (limb_idx > zero_bound)
+	    {
+	      mp_limb_t	 dlimb;
+	      dlimb = dp[limb_idx] & ~mask;
+	      dp[limb_idx] = dlimb;
+
+	      if (UNLIKELY ((dlimb == 0) + limb_idx == dsize)) /* dsize == limb_idx + 1 */
+		{
+		  /* high limb became zero, must normalize */
+		  MPN_NORMALIZE (dp, limb_idx);
+		  SIZ (d) = -limb_idx;
+		}
+	    }
+	  else if (limb_idx == zero_bound)
+	    {
+	      dp[limb_idx] = ((dp[limb_idx] - 1) & ~mask) + 1;
+	      ASSERT (dp[limb_idx] != 0);
+	    }
+	  else
+	    {
+	      MPN_DECR_U (dp + limb_idx, dsize - limb_idx, mask);
+	      dsize -= dp[dsize - 1] == 0;
+	      SIZ (d) = -dsize;
+	    }
+	}
+    }
+}

diff --git a/mpz/size.c b/mpz/size.c
new file mode 100644
index 0000000..b8aa59e
--- /dev/null
+++ b/mpz/size.c

@@ -0,0 +1,34 @@
+/* mpz_size(x) -- return the number of lims currently used by the
+   value of integer X.
+
+Copyright 1991, 1993-1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_size 1
+
+#include "gmp-impl.h"

diff --git a/mpz/sizeinbase.c b/mpz/sizeinbase.c
new file mode 100644
index 0000000..7a1bd01
--- /dev/null
+++ b/mpz/sizeinbase.c

@@ -0,0 +1,42 @@
+/* mpz_sizeinbase(x, base) -- return an approximation to the number of
+   character the integer X would have printed in base BASE.  The
+   approximation is never too small.
+
+Copyright 1991, 1993-1995, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+size_t
+mpz_sizeinbase (mpz_srcptr x, int base) __GMP_NOTHROW
+{
+  size_t  result;
+  MPN_SIZEINBASE (result, PTR(x), ABSIZ(x), base);
+  return result;
+}

diff --git a/mpz/sqrt.c b/mpz/sqrt.c
new file mode 100644
index 0000000..74d2f75
--- /dev/null
+++ b/mpz/sqrt.c

@@ -0,0 +1,76 @@
+/* mpz_sqrt(root, u) --  Set ROOT to floor(sqrt(U)).
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2012, 2015 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+void
+mpz_sqrt (mpz_ptr root, mpz_srcptr op)
+{
+  mp_size_t op_size, root_size;
+  mp_ptr root_ptr, op_ptr;
+
+  op_size = SIZ (op);
+  if (UNLIKELY (op_size <= 0))
+    {
+      if (UNLIKELY (op_size < 0))
+	SQRT_OF_NEGATIVE;
+      SIZ(root) = 0;
+      return;
+    }
+
+  /* The size of the root is accurate after this simple calculation.  */
+  root_size = (op_size + 1) / 2;
+  SIZ (root) = root_size;
+
+  op_ptr = PTR (op);
+
+  if (root == op)
+    {
+      /* Allocate temp space for the root, which we then copy to the
+	 shared OP/ROOT variable.  */
+      TMP_DECL;
+      TMP_MARK;
+
+      root_ptr = TMP_ALLOC_LIMBS (root_size);
+      mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+      MPN_COPY (op_ptr, root_ptr, root_size);
+
+      TMP_FREE;
+    }
+  else
+    {
+      root_ptr = MPZ_NEWALLOC (root, root_size);
+
+      mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+    }
+}

diff --git a/mpz/sqrtrem.c b/mpz/sqrtrem.c
new file mode 100644
index 0000000..a580d95
--- /dev/null
+++ b/mpz/sqrtrem.c

@@ -0,0 +1,85 @@
+/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
+   to the remainder, i.e. X - ROOT**2.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2011, 2012, 2015 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
+{
+  mp_size_t op_size, root_size, rem_size;
+  mp_ptr root_ptr, op_ptr, rem_ptr;
+
+  op_size = SIZ (op);
+  if (UNLIKELY (op_size <= 0))
+    {
+      if (UNLIKELY (op_size < 0))
+	SQRT_OF_NEGATIVE;
+      SIZ(root) = 0;
+      SIZ(rem) = 0;
+      return;
+    }
+
+  /* No-op if rem == op */
+  rem_ptr = MPZ_NEWALLOC (rem, op_size);
+
+  /* The size of the root is accurate after this simple calculation.  */
+  root_size = (op_size + 1) / 2;
+  SIZ (root) = root_size;
+
+  op_ptr = PTR (op);
+
+  if (root == op)
+    {
+      /* Allocate temp space for the root, which we then copy to the
+	 shared OP/ROOT variable.  */
+      TMP_DECL;
+      TMP_MARK;
+
+      root_ptr = TMP_ALLOC_LIMBS (root_size);
+      rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+
+      if (rem != root)	/* Don't overwrite remainder */
+	MPN_COPY (op_ptr, root_ptr, root_size);
+
+      TMP_FREE;
+    }
+  else
+    {
+      root_ptr = MPZ_NEWALLOC (root, root_size);
+
+      rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+    }
+
+  /* Write remainder size last, to make this function give only the square root
+     remainder, when passed ROOT == REM.  */
+  SIZ (rem) = rem_size;
+}

diff --git a/mpz/stronglucas.c b/mpz/stronglucas.c
new file mode 100644
index 0000000..0bf1ce0
--- /dev/null
+++ b/mpz/stronglucas.c

@@ -0,0 +1,214 @@
+/* mpz_stronglucas(n, t1, t2) -- An implementation of the strong Lucas
+   primality test on n, using parameters as suggested by the BPSW test.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2018, 2020 Free Software Foundation, Inc.
+
+Contributed by Marco Bodrato.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Returns an approximation of the sqare root of x.
+ * It gives:
+ *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
+ * or
+ *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
+ */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x);
+  s = (GMP_LIMB_BITS - s) >> 1;
+  return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
+}
+
+static int
+mpz_oddjacobi_ui (mpz_t b, mp_limb_t a)
+{
+  mp_limb_t  b_rem;
+  int        result_bit1;
+
+  ASSERT (a & 1);
+  ASSERT (a > 1);
+  ASSERT (SIZ (b) > 0);
+  ASSERT ((*PTR (b) & 1) == 1);
+
+  result_bit1 = 0;
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, PTR (b), SIZ (b), a);
+  if (UNLIKELY (b_rem == 0))
+    return 0;
+  else
+    return mpn_jacobi_base (b_rem, a, result_bit1);
+}
+
+
+/* Performs strong Lucas' test on x, with parameters suggested */
+/* for the BPSW test. Qk and V are passed to recycle variables. */
+/* Requires GCD (x,6) = 1.*/
+int
+mpz_stronglucas (mpz_srcptr x, mpz_ptr V, mpz_ptr Qk)
+{
+  mp_bitcnt_t b0;
+  mpz_t n;
+  mp_limb_t D; /* The absolute value is stored. */
+  mp_limb_t g;
+  long Q;
+  mpz_t T1, T2;
+
+  /* Test on the absolute value. */
+  mpz_roinit_n (n, PTR (x), ABSIZ (x));
+
+  ASSERT (mpz_odd_p (n));
+  /* ASSERT (mpz_gcd_ui (NULL, n, 6) == 1);	*/
+#if GMP_NUMB_BITS % 16 == 0
+  /* (2^12 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)	*/
+  g = mpn_mod_34lsub1 (PTR (n), SIZ (n));
+  /* (2^12 - 1) = 3^2 * 5 * 7 * 13		*/
+  ASSERT (g % 3 != 0 && g % 5 != 0 && g % 7 != 0);
+  if ((g % 5 & 2) != 0)
+    /* (5/n) = -1, iff n = 2 or 3 (mod 5)	*/
+    /* D = 5; Q = -1 */
+    return mpn_strongfibo (PTR (n), SIZ (n), PTR (V));
+  else if (! POW2_P (g % 7))
+    /* (-7/n) = -1, iff n = 3,5 or 6 (mod 7)	*/
+    D = 7; /* Q = 2 */
+    /* (9/n) = -1, never: 9 = 3^2	*/
+  else if (mpz_oddjacobi_ui (n, 11) == -1)
+    /* (-11/n) = (n/11)	*/
+    D = 11; /* Q = 3 */
+  else if ((((g % 13 - (g % 13 >> 3)) & 7) > 4) ||
+	   (((g % 13 - (g % 13 >> 3)) & 7) == 2))
+    /* (13/n) = -1, iff n = 2,5,6,7,8 or 11 (mod 13)	*/
+    D = 13; /* Q = -3 */
+  else if (g % 3 == 2)
+    /* (-15/n) = (n/15) = (n/5)*(n/3)	*/
+    /* Here, (n/5) = 1, and		*/
+    /* (n/3) = -1, iff n = 2 (mod 3)	*/
+    D = 15; /* Q = 4 */
+#if GMP_NUMB_BITS % 32 == 0
+  /* (2^24 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)	*/
+  /* (2^24 - 1) = (2^12 - 1) * 17 * 241		*/
+  else if (! POW2_P (g % 17) && ! POW2_P (17 - g % 17))
+    /* (17/n) = -1, iff n != +-1,+-2,+-4,+-8 (mod 17)	*/
+    D = 17; /* Q = -4 */
+#endif
+#else
+  if (mpz_oddjacobi_ui (n, 5) == -1)
+    return mpn_strongfibo (PTR (n), SIZ (n), PTR (V));
+#endif
+  else
+  {
+    mp_limb_t maxD;
+    int jac;
+
+    /* n is odd, to possibly be a square, n % 8 = 1 is needed. */
+    if (((*PTR (n) & 6) == 0) && UNLIKELY (mpz_perfect_square_p (n)))
+      return 0; /* A square is composite. */
+
+    /* Check Ds up to square root (in case, n is prime)
+       or avoid overflows */
+    if (SIZ (n) == 1)
+      maxD = limb_apprsqrt (* PTR (n));
+    else if (BITS_PER_ULONG >= GMP_NUMB_BITS && SIZ (n) == 2)
+      mpn_sqrtrem (&maxD, (mp_ptr) NULL, PTR (n), 2);
+    else
+      maxD = GMP_NUMB_MAX;
+    maxD = MIN (maxD, ULONG_MAX);
+
+    unsigned Ddiff = 2;
+#if GMP_NUMB_BITS % 16 == 0
+    const unsigned D2 = 6;
+#if GMP_NUMB_BITS % 32 == 0
+    D = 19;
+    Ddiff = 4;
+#else
+    D = 17;
+#endif
+#else
+    const unsigned D2 = 4;
+    D = 7;
+#endif
+
+    /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,..	*/
+    /* For those Ds we have (D/n) = (n/|D|)	*/
+    /* FIXME: Should we loop only on prime Ds?	*/
+    /* The only interesting composite D is 15, because 3 is not tested.	*/
+    for (;;)
+      {
+	jac = mpz_oddjacobi_ui (n, D);
+	if (jac != 1)
+	  break;
+	if (UNLIKELY (D >= maxD))
+	  return 1;
+	D += Ddiff;
+	Ddiff = D2 - Ddiff;
+      }
+
+    if (UNLIKELY (jac == 0))
+      return 0;
+  }
+
+  /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */
+  Q = (D & 2) ? (D >> 2) + 1 : -(long) (D >> 2);
+  /* ASSERT (mpz_si_kronecker ((D & 2) ? NEG_CAST (long, D) : D, n) == -1); */
+
+  /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */
+  b0 = mpz_scan0 (n, 0);
+
+  mpz_init (T1);
+  mpz_init (T2);
+
+  /* If Ud != 0 && Vd != 0 */
+  if (mpz_lucas_mod (V, Qk, Q, b0, n, T1, T2) == 0)
+    if (LIKELY (--b0 != 0))
+      for (;;)
+	{
+	  /* V_{2k} <- V_k ^ 2 - 2Q^k */
+	  mpz_mul (T2, V, V);
+	  mpz_submul_ui (T2, Qk, 2);
+	  mpz_tdiv_r (V, T2, n);
+	  if (SIZ (V) == 0 || UNLIKELY (--b0 == 0))
+	    break;
+	  /* Q^{2k} = (Q^k)^2 */
+	  mpz_mul (T2, Qk, Qk);
+	  mpz_tdiv_r (Qk, T2, n);
+	}
+
+  mpz_clear (T1);
+  mpz_clear (T2);
+
+  return (b0 != 0);
+}

diff --git a/mpz/sub.c b/mpz/sub.c
new file mode 100644
index 0000000..7cb022e
--- /dev/null
+++ b/mpz/sub.c

@@ -0,0 +1,33 @@
+/* mpz_sub -- subtract integers.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_sub
+#include "aors.h"

diff --git a/mpz/sub_ui.c b/mpz/sub_ui.c
new file mode 100644
index 0000000..3ce23d3
--- /dev/null
+++ b/mpz/sub_ui.c

@@ -0,0 +1,33 @@
+/* mpz_sub_ui -- Subtract an mpz_t and an unsigned one-word integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_sub_ui
+#include "aors_ui.h"

diff --git a/mpz/swap.c b/mpz/swap.c
new file mode 100644
index 0000000..255fac0
--- /dev/null
+++ b/mpz/swap.c

@@ -0,0 +1,39 @@
+/* mpz_swap (dest_integer, src_integer) -- Swap U and V.
+
+Copyright 1997, 1998, 2001, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_swap (mpz_ptr u, mpz_ptr v) __GMP_NOTHROW
+{
+  MP_SIZE_T_SWAP (ALLOC(u), ALLOC(v));
+  MP_SIZE_T_SWAP (SIZ(u), SIZ(v));
+  MP_PTR_SWAP (PTR(v), PTR(u));
+}

diff --git a/mpz/tdiv_q.c b/mpz/tdiv_q.c
new file mode 100644
index 0000000..8c5001d
--- /dev/null
+++ b/mpz/tdiv_q.c

@@ -0,0 +1,87 @@
+/* mpz_tdiv_q -- divide two integers and produce a quotient.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_size_t ql;
+  mp_size_t ns, ds, nl, dl;
+  mp_ptr np, dp, qp, tp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  ds = SIZ (den);
+  nl = ABS (ns);
+  dl = ABS (ds);
+  ql = nl - dl + 1;
+
+  if (UNLIKELY (dl == 0))
+    DIVIDE_BY_ZERO;
+
+  if (ql <= 0)
+    {
+      SIZ (quot) = 0;
+      return;
+    }
+
+  qp = MPZ_REALLOC (quot, ql);
+
+  TMP_MARK;
+  dp = PTR (den);
+
+  /* Copy denominator to temporary space if it overlaps with the quotient.  */
+  if (dp == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+
+  tp = TMP_ALLOC_LIMBS (nl + 1);
+  np = PTR (num);
+  /* Copy numerator to temporary space if it overlaps with the quotient.  */
+  if (np == qp)
+    {
+      MPN_COPY (tp, np, nl);
+      /* Overlap dividend and scratch.  */
+      np = tp;
+    }
+  mpn_div_q (qp, np, nl, dp, dl, tp);
+
+  ql -=  qp[ql - 1] == 0;
+
+  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+  TMP_FREE;
+}

diff --git a/mpz/tdiv_q_2exp.c b/mpz/tdiv_q_2exp.c
new file mode 100644
index 0000000..c6b9bd0
--- /dev/null
+++ b/mpz/tdiv_q_2exp.c

@@ -0,0 +1,67 @@
+/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT.  Round the quotient
+   towards -infinity.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_tdiv_q_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  mp_size_t un, rn;
+  mp_size_t limb_cnt;
+  mp_ptr rp;
+  mp_srcptr up;
+
+  un = SIZ(u);
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  rn = ABS (un) - limb_cnt;
+
+  if (rn <= 0)
+    rn = 0;
+  else
+    {
+      rp = MPZ_REALLOC (r, rn);
+      up = PTR(u) + limb_cnt;
+
+      cnt %= GMP_NUMB_BITS;
+      if (cnt != 0)
+	{
+	  mpn_rshift (rp, up, rn, cnt);
+	  rn -= rp[rn - 1] == 0;
+	}
+      else
+	{
+	  MPN_COPY_INCR (rp, up, rn);
+	}
+    }
+
+  SIZ(r) = un >= 0 ? rn : -rn;
+}

diff --git a/mpz/tdiv_q_ui.c b/mpz/tdiv_q_ui.c
new file mode 100644
index 0000000..a11c51e
--- /dev/null
+++ b/mpz/tdiv_q_ui.c

@@ -0,0 +1,83 @@
+/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
+   -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  SIZ(quot) = 0;
+	  rl = np[0];
+	  return rl;
+	}
+
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/tdiv_qr.c b/mpz/tdiv_qr.c
new file mode 100644
index 0000000..ad82945
--- /dev/null
+++ b/mpz/tdiv_qr.c

@@ -0,0 +1,111 @@
+/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
+   and REM to DIVIDEND mod DIVISOR.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2011, 2012, 2021 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_size_t ql, n0;
+  mp_size_t ns, ds, nl, dl;
+  mp_ptr np, dp, qp, rp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  ds = SIZ (den);
+  nl = ABS (ns);
+  dl = ABS (ds);
+  ql = nl - dl + 1;
+
+  if (UNLIKELY (dl == 0))
+    DIVIDE_BY_ZERO;
+
+  rp = MPZ_REALLOC (rem, dl);
+
+  if (ql <= 0)
+    {
+      if (num != rem)
+	{
+	  np = PTR (num);
+	  MPN_COPY (rp, np, nl);
+	  SIZ (rem) = SIZ (num);
+	}
+      /* This needs to follow the assignment to rem, in case the
+	 numerator and quotient are the same.  */
+      SIZ (quot) = 0;
+      return;
+    }
+
+  qp = MPZ_REALLOC (quot, ql);
+
+  TMP_MARK;
+  np = PTR (num);
+  dp = PTR (den);
+
+  /* FIXME: We should think about how to handle the temporary allocation.
+     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+     allocate temp space.  */
+
+  /* Copy denominator to temporary space if it overlaps with the quotient
+     or remainder.  */
+  if (dp == rp || dp == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+  /* Copy numerator to temporary space if it overlaps with the quotient or
+     remainder.  */
+  if (np == rp || np == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl);
+      MPN_COPY (tp, np, nl);
+      np = tp;
+    }
+
+  for (n0 = 0; *dp == 0; ++dp)
+    {
+      rp [n0++] = *np++;
+      --nl;
+    }
+  mpn_tdiv_qr (qp, rp + n0, 0L, np, nl, dp, dl - n0);
+
+  ql -=  qp[ql - 1] == 0;
+  MPN_NORMALIZE (rp, dl);
+
+  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+  SIZ (rem) = ns >= 0 ? dl : -dl;
+  TMP_FREE;
+}

diff --git a/mpz/tdiv_qr_ui.c b/mpz/tdiv_qr_ui.c
new file mode 100644
index 0000000..4e1c9fa
--- /dev/null
+++ b/mpz/tdiv_qr_ui.c

@@ -0,0 +1,100 @@
+/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) --
+   Set QUOT to DIVIDEND / SHORT_DIVISOR
+   and REM to DIVIDEND mod SHORT_DIVISOR.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012, 2015
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  qp = MPZ_REALLOC (quot, nn);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  SIZ(quot) = 0;
+	  rl = np[0];
+	  SIZ(rem) = ns >= 0 ? 1 : -1;
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  return rl;
+	}
+
+      rp = MPZ_REALLOC (rem, 2);
+
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = ns >= 0 ? rn : -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  SIZ(rem) = ns >= 0 ? 1 : -1;
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	}
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}

diff --git a/mpz/tdiv_r.c b/mpz/tdiv_r.c
new file mode 100644
index 0000000..b0c38d6
--- /dev/null
+++ b/mpz/tdiv_r.c

@@ -0,0 +1,102 @@
+/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2012, 2021 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_size_t ql, n0;
+  mp_size_t ns, nl, dl;
+  mp_ptr np, dp, qp, rp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  nl = ABS (ns);
+  dl = ABSIZ (den);
+  ql = nl - dl + 1;
+
+  if (UNLIKELY (dl == 0))
+    DIVIDE_BY_ZERO;
+
+  if (ql <= 0)
+    {
+      if (num != rem)
+	{
+	  SIZ (rem) = ns;
+	  rp = MPZ_NEWALLOC (rem, nl);
+	  np = PTR (num);
+	  MPN_COPY (rp, np, nl);
+	}
+      return;
+    }
+
+  rp = MPZ_REALLOC (rem, dl);
+
+  TMP_MARK;
+  qp = TMP_ALLOC_LIMBS (ql);
+  np = PTR (num);
+  dp = PTR (den);
+
+  /* FIXME: We should think about how to handle the temporary allocation.
+     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+     allocate temp space.  */
+
+  /* Copy denominator to temporary space if it overlaps with the remainder.  */
+  if (dp == rp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+  /* Copy numerator to temporary space if it overlaps with the remainder.  */
+  if (np == rp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl);
+      MPN_COPY (tp, np, nl);
+      np = tp;
+    }
+
+  for (n0 = 0; *dp == 0; ++dp)
+    {
+      rp [n0++] = *np++;
+      --nl;
+    }
+  mpn_tdiv_qr (qp, rp + n0, 0L, np, nl, dp, dl - n0);
+
+  MPN_NORMALIZE (rp, dl);
+
+  SIZ (rem) = ns >= 0 ? dl : -dl;
+  TMP_FREE;
+}

diff --git a/mpz/tdiv_r_2exp.c b/mpz/tdiv_r_2exp.c
new file mode 100644
index 0000000..96a81f7
--- /dev/null
+++ b/mpz/tdiv_r_2exp.c

@@ -0,0 +1,76 @@
+/* mpz_tdiv_r_2exp -- Divide an integer by 2**CNT and produce a remainder.
+
+Copyright 1991, 1993-1995, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
+{
+  mp_size_t in_size = ABSIZ (in);
+  mp_size_t res_size;
+  mp_size_t limb_cnt = cnt / GMP_NUMB_BITS;
+  mp_srcptr in_ptr = PTR (in);
+
+  if (in_size > limb_cnt)
+    {
+      /* The input operand is (probably) greater than 2**CNT.  */
+      mp_limb_t x;
+
+      x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % GMP_NUMB_BITS) - 1);
+      if (x != 0)
+	{
+	  res_size = limb_cnt + 1;
+	  MPZ_REALLOC (res, res_size);
+
+	  PTR (res)[limb_cnt] = x;
+	}
+      else
+	{
+	  MPN_NORMALIZE (in_ptr, limb_cnt);
+
+	  MPZ_REALLOC (res, limb_cnt);
+
+	  res_size = limb_cnt;
+	}
+    }
+  else
+    {
+      /* The input operand is smaller than 2**CNT.  We perform a no-op,
+	 apart from that we might need to copy IN to RES.  */
+      limb_cnt = in_size;
+      MPZ_REALLOC (res, limb_cnt);
+
+      res_size = limb_cnt;
+    }
+
+  if (res != in)
+    MPN_COPY (PTR (res), PTR (in), limb_cnt);
+  SIZ (res) = SIZ (in) >= 0 ? res_size : -res_size;
+}

diff --git a/mpz/tdiv_r_ui.c b/mpz/tdiv_r_ui.c
new file mode 100644
index 0000000..f3da8ef
--- /dev/null
+++ b/mpz/tdiv_r_ui.c

@@ -0,0 +1,96 @@
+/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
+   -- Set REM to DIVDEND mod DIVISOR_LIMB.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005, 2012,
+2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  SIZ(rem) = ns >= 0 ? 1 : -1;
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	  return rl;
+	}
+
+      rp = MPZ_NEWALLOC (rem, 2);
+
+      TMP_MARK;
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      TMP_FREE;
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = ns >= 0 ? rn : -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+	SIZ(rem) = 0;
+      else
+	{
+	  SIZ(rem) = ns >= 0 ? 1 : -1;
+	  MPZ_NEWALLOC (rem, 1)[0] = rl;
+	}
+    }
+
+  return rl;
+}

diff --git a/mpz/tdiv_ui.c b/mpz/tdiv_ui.c
new file mode 100644
index 0000000..4618599
--- /dev/null
+++ b/mpz/tdiv_ui.c

@@ -0,0 +1,84 @@
+/* mpz_tdiv_ui(dividend, divisor_limb) -- Return DIVDEND mod DIVISOR_LIMB.
+
+Copyright 1991, 1993, 1994, 1996-1998, 2001, 2002, 2004, 2005, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+unsigned long int
+mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (UNLIKELY (divisor == 0))
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)		/* tdiv_qr requirements; tested above for 0 */
+	{
+	  rl = np[0];
+	  return rl;
+	}
+
+      TMP_MARK;
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      TMP_FREE;
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+    }
+
+  return rl;
+}

diff --git a/mpz/tstbit.c b/mpz/tstbit.c
new file mode 100644
index 0000000..48725d4
--- /dev/null
+++ b/mpz/tstbit.c

@@ -0,0 +1,80 @@
+/* mpz_tstbit -- test a specified bit.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* For negatives the effective twos complement is achieved by negating the
+   limb tested, either with a ones or twos complement.  Twos complement
+   ("-") is used if there's only zero limbs below the one being tested.
+   Ones complement ("~") is used if there's a non-zero below.  Note that "-"
+   is correct even if the limb examined is 0 (and the true beginning of twos
+   complement is further up).
+
+   Testing the limbs below p is unavoidable on negatives, but will usually
+   need to examine only *(p-1).  The search is done from *(p-1) down to
+   *u_ptr, since that might give better cache locality, and because a
+   non-zero limb is perhaps a touch more likely in the middle of a number
+   than at the low end.
+
+   Bits past the end of available data simply follow sign of u.  Notice that
+   the limb_index >= abs_size test covers u=0 too.  */
+
+int
+mpz_tstbit (mpz_srcptr u, mp_bitcnt_t bit_index) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr      = PTR(u);
+  mp_size_t      size       = SIZ(u);
+  unsigned       abs_size   = ABS(size);
+  mp_size_t      limb_index = bit_index / GMP_NUMB_BITS;
+  mp_srcptr      p          = u_ptr + limb_index;
+  mp_limb_t      limb;
+
+  if (limb_index >= abs_size)
+    return (size < 0);
+
+  limb = *p;
+  if (size < 0)
+    {
+      limb = -limb;     /* twos complement */
+
+      while (p != u_ptr)
+	{
+	  p--;
+	  if (*p != 0)
+	    {
+	      limb--;	/* make it a ones complement instead */
+	      break;
+	    }
+	}
+    }
+
+  return (limb >> (bit_index % GMP_NUMB_BITS)) & 1;
+}

diff --git a/mpz/ui_pow_ui.c b/mpz/ui_pow_ui.c
new file mode 100644
index 0000000..87f2d3a
--- /dev/null
+++ b/mpz/ui_pow_ui.c

@@ -0,0 +1,58 @@
+/* mpz_ui_pow_ui -- ulong raised to ulong.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+mpz_ui_pow_ui (mpz_ptr r, unsigned long b, unsigned long e)
+{
+#if GMP_NAIL_BITS != 0
+  if (b > GMP_NUMB_MAX)
+    {
+      mp_limb_t bb[2];
+      bb[0] = b & GMP_NUMB_MASK;
+      bb[1] = b >> GMP_NUMB_BITS;
+      mpz_n_pow_ui (r, bb, (mp_size_t) 2, e);
+    }
+  else
+#endif
+    {
+#ifdef _LONG_LONG_LIMB
+      /* i386 gcc 2.95.3 doesn't recognise blimb can be eliminated when
+	 mp_limb_t is an unsigned long, so only use a separate blimb when
+	 necessary.  */
+      mp_limb_t  blimb = b;
+      mpz_n_pow_ui (r, &blimb, (mp_size_t) (b != 0), e);
+#else
+      mpz_n_pow_ui (r, &b,     (mp_size_t) (b != 0), e);
+#endif
+    }
+}

diff --git a/mpz/ui_sub.c b/mpz/ui_sub.c
new file mode 100644
index 0000000..1d0edb9
--- /dev/null
+++ b/mpz/ui_sub.c

@@ -0,0 +1,90 @@
+/* mpz_ui_sub -- Subtract an unsigned one-word integer and an mpz_t.
+
+Copyright 2002, 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_ui_sub (mpz_ptr w, unsigned long int uval, mpz_srcptr v)
+{
+  mp_ptr vp, wp;
+  mp_size_t vn, wn;
+  mp_limb_t cy;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (uval > GMP_NUMB_MAX)
+    {
+      mpz_t u;
+      mp_limb_t ul[2];
+      PTR(u) = ul;
+      ul[0] = uval & GMP_NUMB_MASK;
+      ul[1] = uval >> GMP_NUMB_BITS;
+      SIZ(u) = 2;
+      mpz_sub (w, u, v);
+      return;
+    }
+#endif
+
+  vn = SIZ(v);
+
+  if (vn > 1)
+    {
+      wp = MPZ_REALLOC (w, vn);
+      vp = PTR(v);
+      mpn_sub_1 (wp, vp, vn, (mp_limb_t) uval);
+      wn = -(vn - (wp[vn - 1] == 0));
+    }
+  else if (vn >= 0)
+    {
+      mp_limb_t vp0;
+      vp0 = PTR (v)[0] & - (mp_limb_t) vn;
+      wp = MPZ_NEWALLOC (w, 1);
+      if (uval >= vp0)
+	{
+	  wp[0] = uval - vp0;
+	  wn = wp[0] != 0;
+	}
+      else
+	{
+	  wp[0] = vp0 - uval;
+	  wn = -1;
+	}
+    }
+  else /* (vn < 0) */
+    {
+      vn = -vn;
+      wp = MPZ_REALLOC (w, vn + 1);
+      vp = PTR(v);
+      cy = mpn_add_1 (wp, vp, vn, (mp_limb_t) uval);
+      wp[vn] = cy;
+      wn = vn + (cy != 0);
+    }
+
+  SIZ(w) = wn;
+}

diff --git a/mpz/urandomb.c b/mpz/urandomb.c
new file mode 100644
index 0000000..5c6ad77
--- /dev/null
+++ b/mpz/urandomb.c

@@ -0,0 +1,47 @@
+/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom
+   integer in the range 0 to 2^N - 1, inclusive, using STATE as the
+   random state previously initialized by a call to gmp_randinit().
+
+Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_urandomb (mpz_ptr rop, gmp_randstate_ptr rstate, mp_bitcnt_t nbits)
+{
+  mp_ptr rp;
+  mp_size_t size;
+
+  size = BITS_TO_LIMBS (nbits);
+  rp = MPZ_NEWALLOC (rop, size);
+
+  _gmp_rand (rp, rstate, nbits);
+  MPN_NORMALIZE (rp, size);
+  SIZ (rop) = size;
+}

diff --git a/mpz/urandomm.c b/mpz/urandomm.c
new file mode 100644
index 0000000..4e6b0ba
--- /dev/null
+++ b/mpz/urandomm.c

@@ -0,0 +1,98 @@
+/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom
+   integer in the range 0 to N-1, using STATE as the random state
+   previously initialized by a call to gmp_randinit().
+
+Copyright 2000, 2002, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h" /* for count_leading_zeros */
+
+
+#define MAX_URANDOMM_ITER  80
+
+void
+mpz_urandomm (mpz_ptr rop, gmp_randstate_ptr rstate, mpz_srcptr n)
+{
+  mp_ptr rp, np;
+  mp_size_t nbits, size;
+  mp_limb_t nh;
+  int count;
+  int pow2;
+  int cmp;
+  TMP_DECL;
+
+  size = ABSIZ (n);
+  if (UNLIKELY (size == 0))
+    DIVIDE_BY_ZERO;
+
+  np = PTR (n);
+  nh = np[size - 1];
+
+  /* Detect whether n is a power of 2.  */
+  pow2 = POW2_P (nh) && (size == 1 || mpn_zero_p (np, size - 1));
+
+  count_leading_zeros (count, nh);
+  nbits = size * GMP_NUMB_BITS - (count - GMP_NAIL_BITS) - pow2;
+  if (nbits == 0)		/* nbits == 0 means that n was == 1.  */
+    {
+      SIZ (rop) = 0;
+      return;
+    }
+
+  TMP_MARK;
+  if (rop == n)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (size);
+      MPN_COPY (tp, np, size);
+      np = tp;
+    }
+
+  /* Here the allocated size can be one too much if n is a power of
+     (2^GMP_NUMB_BITS) but it's convenient for using mpn_cmp below.  */
+  rp = MPZ_NEWALLOC (rop, size);
+  /* Clear last limb to prevent the case in which size is one too much.  */
+  rp[size - 1] = 0;
+
+  count = MAX_URANDOMM_ITER;	/* Set iteration count limit.  */
+  do
+    {
+      _gmp_rand (rp, rstate, nbits);
+      MPN_CMP (cmp, rp, np, size);
+    }
+  while (cmp >= 0 && --count != 0);
+
+  if (count == 0)
+    /* Too many iterations; return result mod n == result - n */
+    mpn_sub_n (rp, rp, np, size);
+
+  MPN_NORMALIZE (rp, size);
+  SIZ (rop) = size;
+  TMP_FREE;
+}

diff --git a/mpz/xor.c b/mpz/xor.c
new file mode 100644
index 0000000..5ec657a
--- /dev/null
+++ b/mpz/xor.c

@@ -0,0 +1,146 @@
+/* mpz_xor -- Logical xor.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012,
+2015-2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size;
+
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  if (op1_size < op2_size)
+    {
+      MPZ_SRCPTR_SWAP (op1, op2);
+      MP_SIZE_T_SWAP (op1_size, op2_size);
+    }
+
+  op1_ptr = PTR(op1);
+  res_ptr = PTR(res);
+
+  if (op2_size >= 0)
+    {
+      if (res_ptr != op1_ptr)
+	{
+	  res_ptr = MPZ_REALLOC (res, op1_size);
+	  MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+		    op1_size - op2_size);
+	}
+      if (LIKELY (op2_size != 0))
+	mpn_xor_n (res_ptr, op1_ptr, PTR(op2), op2_size);
+      res_size = op1_size;
+
+      MPN_NORMALIZE (res_ptr, res_size);
+      SIZ(res) = res_size;
+    }
+  else
+    {
+      mp_ptr opx;
+      TMP_DECL;
+
+      op2_size = -op2_size;
+      TMP_MARK;
+      if (op1_size < 0)
+	{
+	  mp_ptr opy;
+
+	  /* Both operands are negative, the result will be positive.
+	      (-OP1) ^ (-OP2) =
+	     = ~(OP1 - 1) ^ ~(OP2 - 1) =
+	     = (OP1 - 1) ^ (OP2 - 1)  */
+
+	  op1_size = -op1_size;
+
+	  /* Possible optimization: Decrease mpn_sub precision,
+	     as we won't use the entire res of both.  */
+	  TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
+	  mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+	  op1_ptr = opx;
+
+	  mpn_sub_1 (opy, PTR(op2), op2_size, (mp_limb_t) 1);
+	  op2_ptr = opy;
+
+	  res_ptr = MPZ_NEWALLOC (res, op2_size);
+	  /* Don't re-read OP1_PTR and OP2_PTR.  They point to temporary
+	     space--never to the space PTR(res) used to point to before
+	     reallocation.  */
+
+	  MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+		    op2_size - op1_size);
+	  mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+	  TMP_FREE;
+	  res_size = op2_size;
+
+	  MPN_NORMALIZE (res_ptr, res_size);
+	  SIZ(res) = res_size;
+	}
+      else
+	{
+	  /* Operand 2 negative, so will be the result.
+	     -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =
+	     = ~(OP1 ^ ~(OP2 - 1)) + 1 =
+	     = (OP1 ^ (OP2 - 1)) + 1      */
+
+	  res_size = MAX (op1_size, op2_size);
+	  res_ptr = MPZ_REALLOC (res, res_size + 1);
+	  op1_ptr = PTR(op1);
+
+	  opx = TMP_ALLOC_LIMBS (op2_size);
+	  mpn_sub_1 (opx, PTR(op2), op2_size, (mp_limb_t) 1);
+	  op2_ptr = opx;
+
+	  if (res_size == op1_size)
+	    {
+	      MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+	      mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
+	    }
+	  else
+	    {
+	      MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+	      if (LIKELY (op1_size != 0))
+		mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+	    }
+	  TMP_FREE;
+
+	  res_ptr[res_size] = 0;
+	  MPN_INCR_U (res_ptr, res_size + 1, (mp_limb_t) 1);
+	  res_size += res_ptr[res_size];
+
+	  MPN_NORMALIZE_NOT_ZERO (res_ptr, res_size);
+	  SIZ(res) = -res_size;
+	}
+    }
+}

diff --git a/nextprime.c b/nextprime.c
new file mode 100644
index 0000000..e8e60dd
--- /dev/null
+++ b/nextprime.c

@@ -0,0 +1,166 @@
+/* gmp_nextprime -- generate small primes reasonably efficiently for internal
+   GMP needs.
+
+   Contributed to the GNU project by Torbjorn Granlund.  Miscellaneous
+   improvements by Martin Boij.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/*
+  Optimisation ideas:
+
+  1. Unroll the sieving loops.  Should reach 1 write/cycle.  That would be a 2x
+     improvement.
+
+  2. Separate sieving with primes p < SIEVESIZE and p >= SIEVESIZE.  The latter
+     will need at most one write, and thus not need any inner loop.
+
+  3. For primes p >= SIEVESIZE, i.e., typically the majority of primes, we
+     perform more than one division per sieving write.  That might dominate the
+     entire run time for the nextprime function.  A incrementally initialised
+     remainder table of Pi(65536) = 6542 16-bit entries could replace that
+     division.
+*/
+
+#include "gmp-impl.h"
+#include <string.h>		/* for memset */
+
+
+unsigned long int
+gmp_nextprime (gmp_primesieve_t *ps)
+{
+  unsigned long p, d, pi;
+  unsigned char *sp;
+  static unsigned char addtab[] =
+    { 2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,
+      2,4,6,2,6,4,2,4,2,10,2,10 };
+  unsigned char *addp = addtab;
+  unsigned long ai;
+
+  /* Look for already sieved primes.  A sentinel at the end of the sieving
+     area allows us to use a very simple loop here.  */
+  d = ps->d;
+  sp = ps->s + d;
+  while (*sp != 0)
+    sp++;
+  if (sp != ps->s + SIEVESIZE)
+    {
+      d = sp - ps->s;
+      ps->d = d + 1;
+      return ps->s0 + 2 * d;
+    }
+
+  /* Handle the number 2 separately.  */
+  if (ps->s0 < 3)
+    {
+      ps->s0 = 3 - 2 * SIEVESIZE; /* Tricky */
+      return 2;
+    }
+
+  /* Exhausted computed primes.  Resieve, then call ourselves recursively.  */
+
+#if 0
+  for (sp = ps->s; sp < ps->s + SIEVESIZE; sp++)
+    *sp = 0;
+#else
+  memset (ps->s, 0, SIEVESIZE);
+#endif
+
+  ps->s0 += 2 * SIEVESIZE;
+
+  /* Update sqrt_s0 as needed.  */
+  while ((ps->sqrt_s0 + 1) * (ps->sqrt_s0 + 1) <= ps->s0 + 2 * SIEVESIZE - 1)
+    ps->sqrt_s0++;
+
+  pi = ((ps->s0 + 3) / 2) % 3;
+  if (pi > 0)
+    pi = 3 - pi;
+  if (ps->s0 + 2 * pi <= 3)
+    pi += 3;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 3;
+    }
+
+  pi = ((ps->s0 + 5) / 2) % 5;
+  if (pi > 0)
+    pi = 5 - pi;
+  if (ps->s0 + 2 * pi <= 5)
+    pi += 5;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 5;
+    }
+
+  pi = ((ps->s0 + 7) / 2) % 7;
+  if (pi > 0)
+    pi = 7 - pi;
+  if (ps->s0 + 2 * pi <= 7)
+    pi += 7;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 7;
+    }
+
+  p = 11;
+  ai = 0;
+  while (p <= ps->sqrt_s0)
+    {
+      pi = ((ps->s0 + p) / 2) % p;
+      if (pi > 0)
+	pi = p - pi;
+      if (ps->s0 + 2 * pi <= p)
+	  pi += p;
+      sp = ps->s + pi;
+      while (sp < ps->s + SIEVESIZE)
+	{
+	  *sp = 1, sp += p;
+	}
+      p += addp[ai];
+      ai = (ai + 1) % 48;
+    }
+  ps->d = 0;
+  return gmp_nextprime (ps);
+}
+
+void
+gmp_init_primesieve (gmp_primesieve_t *ps)
+{
+  ps->s0 = 0;
+  ps->sqrt_s0 = 0;
+  ps->d = SIEVESIZE;
+  ps->s[SIEVESIZE] = 0;		/* sentinel */
+}

diff --git a/patches/exceptions.diff b/patches/exceptions.diff
new file mode 100644
index 0000000..0e6a4f9
--- /dev/null
+++ b/patches/exceptions.diff

@@ -0,0 +1,340 @@
+Make GMP compile in a noexcept environment
+
+GMP throws exceptions to indicate errors like failed allocation or negative
+factorials. Change these to print a terse error message and SIGABRT.
+
+Ideally, these would `LOG(FATAL)`. However, giving GMP an Abseil dependency
+breaks Haskell binaries. (The problematic dependency chain appears to be
+Haskell -> GMP -> Abseil -> libunwind.) Instead, just log using `std::cerr` and
+call `std::abort`; the abnormal termination will trigger a stack trace in
+google3 binaries anyway.
+
+This is an updated version of cl/50915228.
+
+--- a/gmpxx.h
++++ b/gmpxx.h
+@@ -39,7 +39,12 @@ see https://www.gnu.org/licenses/.  */
+ #include <utility>
+ #include <algorithm>  /* swap */
+ #include <string>
++#ifdef __EXCEPTIONS
+ #include <stdexcept>
++#else
++#include <cstdlib>
++#include <iostream>
++#endif
+ #include <cfloat>
+ #include <gmp.h>
+ 
+@@ -1240,7 +1245,14 @@ struct __gmp_fac_function
+   static void eval(mpz_ptr z, signed long l)
+   {
+     if (l < 0)
++#ifdef __EXCEPTIONS
+       throw std::domain_error ("factorial(negative)");
++#else
++      {
++        std::cerr << "std::domain_error: factorial(negative)\n";
++        std::abort();
++      }
++#endif
+     eval(z, static_cast<unsigned long>(l));
+   }
+   static void eval(mpz_ptr z, mpz_srcptr w)
+@@ -1248,9 +1260,23 @@ struct __gmp_fac_function
+     if (!mpz_fits_ulong_p(w))
+       {
+ 	if (mpz_sgn(w) < 0)
++#ifdef __EXCEPTIONS
+ 	  throw std::domain_error ("factorial(negative)");
++#else
++         {
++	    std::cerr << "std::domain_error: factorial(negative)\n";
++           std::abort();
++         }
++#endif
+ 	else
++#ifdef __EXCEPTIONS
+ 	  throw std::bad_alloc(); // or std::overflow_error ("factorial")?
++#else
++         {
++	    std::cerr << "std::bad_alloc\n";
++           std::abort();
++         }
++#endif
+       }
+     eval(z, mpz_get_ui(w));
+   }
+@@ -1264,7 +1290,14 @@ struct __gmp_primorial_function
+   static void eval(mpz_ptr z, signed long l)
+   {
+     if (l < 0)
++#ifdef __EXCEPTIONS
+       throw std::domain_error ("primorial(negative)");
++#else
++      {
++        std::cerr << "std::domain_error: primorial(negative)\n";
++        std::abort();
++      }
++#endif
+     eval(z, static_cast<unsigned long>(l));
+   }
+   static void eval(mpz_ptr z, mpz_srcptr w)
+@@ -1272,9 +1305,23 @@ struct __gmp_primorial_function
+     if (!mpz_fits_ulong_p(w))
+       {
+ 	if (mpz_sgn(w) < 0)
++#ifdef __EXCEPTIONS
+ 	  throw std::domain_error ("primorial(negative)");
++#else
++         {
++	    std::cerr << "std::domain_error: primorial(negative)\n";
++           std::abort();
++         }
++#endif
+ 	else
++#ifdef __EXCEPTIONS
+ 	  throw std::bad_alloc(); // or std::overflow_error ("primorial")?
++#else
++         {
++	    std::cerr << "std::bad_alloc\n";
++           std::abort();
++         }
++#endif
+       }
+     eval(z, mpz_get_ui(w));
+   }
+@@ -1299,7 +1346,14 @@ struct __gmp_fib_function
+   static void eval(mpz_ptr z, mpz_srcptr w)
+   {
+     if (!mpz_fits_slong_p(w))
++#ifdef __EXCEPTIONS
+       throw std::bad_alloc(); // or std::overflow_error ("fibonacci")?
++#else
++      {
++        std::cerr << "std::bad_alloc\n";
++        std::abort();
++      }
++#endif
+     eval(z, mpz_get_si(w));
+   }
+   static void eval(mpz_ptr z, double d)
+@@ -1650,7 +1704,14 @@ public:
+     if (mpz_init_set_str (mp, s, base) != 0)
+       {
+         mpz_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpz_set_str");
++#else
++        {
++          std::cerr << "std::invalid_argument: mpz_set_str\n";
++          std::abort();
++        }
++#endif
+       }
+   }
+   explicit __gmp_expr(const std::string &s, int base = 0)
+@@ -1658,7 +1719,14 @@ public:
+     if (mpz_init_set_str(mp, s.c_str(), base) != 0)
+       {
+         mpz_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpz_set_str");
++#else
++        {
++          std::cerr << "std::invalid_argument: mpz_set_str\n";
++          std::abort();
++        }
++#endif
+       }
+   }
+ 
+@@ -1685,13 +1753,27 @@ public:
+   __gmp_expr & operator=(const char *s)
+   {
+     if (mpz_set_str (mp, s, 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpz_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpz_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+   __gmp_expr & operator=(const std::string &s)
+   {
+     if (mpz_set_str(mp, s.c_str(), 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpz_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpz_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+ 
+@@ -1828,7 +1910,14 @@ public:
+     else if (mpq_set_str(mp, s, base) != 0)
+       {
+         mpq_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpq_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpq_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+   explicit __gmp_expr(const std::string &s, int base = 0)
+@@ -1837,7 +1926,14 @@ public:
+     if (mpq_set_str (mp, s.c_str(), base) != 0)
+       {
+         mpq_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpq_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpq_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+   explicit __gmp_expr(mpq_srcptr q)
+@@ -1874,13 +1970,27 @@ public:
+   __gmp_expr & operator=(const char *s)
+   {
+     if (mpq_set_str (mp, s, 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpq_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpq_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+   __gmp_expr & operator=(const std::string &s)
+   {
+     if (mpq_set_str(mp, s.c_str(), 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpq_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpq_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+ 
+@@ -2038,7 +2148,14 @@ public:
+     if (mpf_init_set_str (mp, s, 0) != 0)
+       {
+         mpf_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+   __gmp_expr(const char *s, mp_bitcnt_t prec, int base = 0)
+@@ -2047,7 +2164,14 @@ public:
+     if (mpf_set_str(mp, s, base) != 0)
+       {
+         mpf_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+   explicit __gmp_expr(const std::string &s)
+@@ -2055,7 +2179,14 @@ public:
+     if (mpf_init_set_str(mp, s.c_str(), 0) != 0)
+       {
+         mpf_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+   __gmp_expr(const std::string &s, mp_bitcnt_t prec, int base = 0)
+@@ -2064,7 +2195,14 @@ public:
+     if (mpf_set_str(mp, s.c_str(), base) != 0)
+       {
+         mpf_clear (mp);
++#ifdef __EXCEPTIONS
+         throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+       }
+   }
+ 
+@@ -2093,13 +2231,27 @@ public:
+   __gmp_expr & operator=(const char *s)
+   {
+     if (mpf_set_str (mp, s, 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+   __gmp_expr & operator=(const std::string &s)
+   {
+     if (mpf_set_str(mp, s.c_str(), 0) != 0)
++#ifdef __EXCEPTIONS
+       throw std::invalid_argument ("mpf_set_str");
++#else
++      {
++        std::cerr << "std::invalid_argument: mpf_set_str\n";
++        std::abort();
++      }
++#endif
+     return *this;
+   }
+ 
+@@ -3505,7 +3657,14 @@ public:
+ 		mp_bitcnt_t size)
+   {
+     if (f (state, size) == 0)
++#ifdef __EXCEPTIONS
+       throw std::length_error ("gmp_randinit_lc_2exp_size");
++#else
++      {
++        std::cerr << "std::length_error: gmp_randinit_lc_2exp_size\n";
++        std::abort();
++      }
++#endif
+   }
+ 
+   ~gmp_randclass() { gmp_randclear(state); }

diff --git a/patches/overflow.diff b/patches/overflow.diff
new file mode 100644
index 0000000..960bf54
--- /dev/null
+++ b/patches/overflow.diff

@@ -0,0 +1,22 @@
+Fix incorrect overflow test, that itself overflowed.
+
+"(unsigned long)(x * y)" will multiply ints x and y into a new int, then
+converts the int into an unsigned long. Replace it with
+"(unsigned long)(x) * (unsigned long)(y)" which performs the multiplication in a
+larger space.
+
+Converted from cl/147088677.
+
+--- a/mpn/generic/get_d.c
++++ b/mpn/generic/get_d.c
+@@ -151,8 +151,8 @@ mpn_get_d (mp_srcptr up, mp_size_t size,
+   /* Adjust exp to a radix point just above {up,size}, guarding against
+      overflow.  After this exp can of course be reduced to anywhere within
+      the {up,size} region without underflow.  */
+-  if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
+-		> ((unsigned long) LONG_MAX - exp)))
++  if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS) * (unsigned long) (size)
++		> (LONG_MAX - (unsigned long) exp)))
+     {
+ #if _GMP_IEEE_FLOATS
+       goto ieee_infinity;

diff --git a/patches/series b/patches/series
new file mode 100644
index 0000000..161b008
--- /dev/null
+++ b/patches/series

@@ -0,0 +1,2 @@
+exceptions.diff
+overflow.diff

diff --git a/primesieve.c b/primesieve.c
new file mode 100644
index 0000000..567fab0
--- /dev/null
+++ b/primesieve.c

@@ -0,0 +1,321 @@
+/* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010-2012, 2015, 2016, 2021, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#if 0
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_fto_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_fto_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+/* n_cto_bit (n) = ((n-2)&(-CNST_LIMB(2)))/3U */
+static mp_limb_t
+n_cto_bit (mp_limb_t n) { return (n|1)/3U-1; }
+
+#if 0
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_fto_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+#define SET_OFF1(m1, m2, M1, M2, off, BITS)		\
+  if (off) {						\
+    if (off < GMP_LIMB_BITS) {				\
+      m1 = (M1 >> off) | (M2 << (GMP_LIMB_BITS - off));	\
+      if (off <= BITS - GMP_LIMB_BITS) {		\
+	m2 = M1 << (BITS - GMP_LIMB_BITS - off)		\
+	  | M2 >> off;					\
+      } else {						\
+	m1 |= M1 << (BITS - off);			\
+	m2 = M1 >> (off + GMP_LIMB_BITS - BITS);	\
+      }							\
+    } else {						\
+      m1 = M1 << (BITS - off)				\
+	| M2 >> (off - GMP_LIMB_BITS);			\
+      m2 = M2 << (BITS - off)				\
+	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
+    }							\
+  } else {						\
+    m1 = M1; m2 = M2;					\
+  }
+
+#define SET_OFF2(m1, m2, m3, M1, M2, M3, off, BITS)	\
+  if (off) {						\
+    if (off <= GMP_LIMB_BITS) {				\
+      m1 = M2 << (GMP_LIMB_BITS - off);			\
+      m2 = M3 << (GMP_LIMB_BITS - off);			\
+      if (off != GMP_LIMB_BITS) {			\
+	m1 |= (M1 >> off);				\
+	m2 |= (M2 >> off);				\
+      }							\
+      if (off <= BITS - 2 * GMP_LIMB_BITS) {		\
+	m3 = M1 << (BITS - 2 * GMP_LIMB_BITS - off)	\
+	  | M3 >> off;					\
+      } else {						\
+	m2 |= M1 << (BITS - GMP_LIMB_BITS - off);	\
+	m3 = M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
+      }							\
+    } else if (off < 2 *GMP_LIMB_BITS) {		\
+      m1 = M2 >> (off - GMP_LIMB_BITS)			\
+	| M3 << (2 * GMP_LIMB_BITS - off);		\
+      if (off <= BITS - GMP_LIMB_BITS) {		\
+	m2 = M3 >> (off - GMP_LIMB_BITS)		\
+	  | M1 << (BITS - GMP_LIMB_BITS - off);		\
+	m3 = M2 << (BITS - GMP_LIMB_BITS - off);	\
+	if (off != BITS - GMP_LIMB_BITS) {		\
+	  m3 |= M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
+	}						\
+      } else {						\
+	m1 |= M1 << (BITS - off);			\
+	m2 = M2 << (BITS - off)				\
+	  | M1 >> (GMP_LIMB_BITS - BITS + off);		\
+	m3 = M2 >> (GMP_LIMB_BITS - BITS + off);	\
+      }							\
+    } else {						\
+      m1 = M1 << (BITS - off)				\
+	| M3 >> (off - 2 * GMP_LIMB_BITS);		\
+      m2 = M2 << (BITS - off)				\
+	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
+      m3 = M3 << (BITS - off)				\
+	| M2 >> (off + GMP_LIMB_BITS - BITS);		\
+    }							\
+  } else {						\
+    m1 = M1; m2 = M2; m3 = M3;				\
+  }
+
+#define ROTATE1(m1, m2, BITS)			\
+  do {						\
+    mp_limb_t __tmp;				\
+    __tmp = m1 >> (2 * GMP_LIMB_BITS - BITS);	\
+    m1 = (m1 << (BITS - GMP_LIMB_BITS)) | m2;	\
+    m2 = __tmp;					\
+  } while (0)
+
+#define ROTATE2(m1, m2, m3, BITS)		\
+  do {						\
+    mp_limb_t __tmp;				\
+    __tmp = m2 >> (3 * GMP_LIMB_BITS - BITS);	\
+    m2 = m2 << (BITS - GMP_LIMB_BITS * 2)	\
+      | m1 >> (3 * GMP_LIMB_BITS - BITS);	\
+    m1 = m1 << (BITS - GMP_LIMB_BITS * 2) | m3;	\
+    m3 = __tmp;					\
+  } while (0)
+
+static mp_limb_t
+fill_bitpattern (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset)
+{
+#ifdef SIEVE_2MSK2
+  mp_limb_t m11, m12, m21, m22, m23;
+
+  { /* correctly handle offset == 0... */
+    mp_limb_t off1 = offset % (11 * 5 * 2);
+    SET_OFF1 (m11, m12, SIEVE_MASK1, SIEVE_MASKT, off1, 11 * 5 * 2);
+    offset %= 13 * 7 * 2;
+    SET_OFF2 (m21, m22, m23, SIEVE_2MSK1, SIEVE_2MSK2, SIEVE_2MSKT, offset, 13 * 7 * 2);
+  }
+  /* THINK: Consider handling odd values of 'limbs' outside the loop,
+     to have a single exit condition. */
+  do {
+    bit_array[0] = m11 | m21;
+    if (--limbs == 0)
+      break;
+    ROTATE1 (m11, m12, 11 * 5 * 2);
+    bit_array[1] = m11 | m22;
+    bit_array += 2;
+    ROTATE1 (m11, m12, 11 * 5 * 2);
+    ROTATE2 (m21, m22, m23, 13 * 7 * 2);
+  } while (--limbs != 0);
+  return n_cto_bit (13 + 1);
+#else
+#ifdef SIEVE_MASK2
+  mp_limb_t mask, mask2, tail;
+
+  { /* correctly handle offset == 0... */
+    offset %= 7 * 5 * 2;
+    SET_OFF2 (mask, mask2, tail, SIEVE_MASK1, SIEVE_MASK2, SIEVE_MASKT, offset, 7 * 5 * 2);
+  }
+  /* THINK: Consider handling odd values of 'limbs' outside the loop,
+     to have a single exit condition. */
+  do {
+    bit_array[0] = mask;
+    if (--limbs == 0)
+      break;
+    bit_array[1] = mask2;
+    bit_array += 2;
+    ROTATE2 (mask, mask2, tail, 7 * 5 * 2);
+  } while (--limbs != 0);
+  return n_cto_bit (7 + 1);
+#else
+  MPN_FILL (bit_array, limbs, CNST_LIMB(0));
+  return 0;
+#endif
+#endif
+}
+
+static void
+block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,
+	       mp_srcptr sieve)
+{
+  mp_size_t bits, off = offset;
+  mp_limb_t mask, i;
+
+  ASSERT (limbs > 0);
+
+  bits = limbs * GMP_LIMB_BITS - 1;
+
+  i = fill_bitpattern (bit_array, limbs, offset);
+
+  ASSERT (i < GMP_LIMB_BITS);
+
+  mask = CNST_LIMB(1) << i;
+  do {
+    ++i;
+    if ((*sieve & mask) == 0)
+      {
+	mp_size_t step, lindex;
+	mp_limb_t lmask;
+	unsigned  maskrot;
+
+	step = id_to_n(i);
+
+/*	lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+	lindex = i*(step+1)-1+(-(i&1)&(i+1));
+/*	lindex = i*(step+1+(i&1))-1+(i&1); */
+	if (lindex > bits + off)
+	  break;
+
+	step <<= 1;
+	maskrot = step % GMP_LIMB_BITS;
+
+	if (lindex < off)
+	  lindex += step * ((off - lindex - 1) / step + 1);
+
+	lindex -= off;
+
+	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+	for ( ; lindex <= bits; lindex += step) {
+	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+	};
+
+/*	lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
+	lindex = i*(i*3+6)+(i&1);
+
+	if (lindex < off)
+	  lindex += step * ((off - lindex - 1) / step + 1);
+
+	lindex -= off;
+
+	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+	for ( ; lindex <= bits; lindex += step) {
+	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+	};
+      }
+      mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
+      sieve += mask & 1;
+  } while (1);
+}
+
+#define BLOCK_SIZE 2048
+
+/* Fills bit_array with the characteristic function of composite
+   numbers up to the parameter n. I.e. a bit set to "1" represents a
+   composite, a "0" represents a prime.
+
+   The primesieve_size(n) limbs pointed to by bit_array are
+   overwritten. The returned value counts prime integers in the
+   interval [4, n]. Note that n > 4.
+
+   Even numbers and multiples of 3 are excluded "a priori", only
+   numbers equivalent to +/- 1 mod 6 have their bit in the array.
+
+   Once sieved, if the bit b is ZERO it represent a prime, the
+   represented prime is bit_to_n(b), if the LSbit is bit 0, or
+   id_to_n(b), if you call "1" the first bit.
+ */
+
+mp_limb_t
+gmp_primesieve (mp_ptr bit_array, mp_limb_t n)
+{
+  mp_size_t size;
+  mp_limb_t bits;
+  static mp_limb_t presieved[] = {PRIMESIEVE_INIT_TABLE};
+
+  ASSERT (n > 4);
+
+  bits = n_fto_bit(n);
+  size = bits / GMP_LIMB_BITS + 1;
+
+  for (mp_size_t j = 0, lim = MIN (size, PRIMESIEVE_NUMBEROF_TABLE);
+       j < lim; ++j)
+    bit_array [j] = presieved [j]; /* memcopy? */
+
+  if (size > PRIMESIEVE_NUMBEROF_TABLE) {
+    mp_size_t off;
+    off = size > 2 * BLOCK_SIZE ? BLOCK_SIZE + (size % BLOCK_SIZE) : size;
+    block_resieve (bit_array + PRIMESIEVE_NUMBEROF_TABLE,
+		   off - PRIMESIEVE_NUMBEROF_TABLE,
+		   GMP_LIMB_BITS * PRIMESIEVE_NUMBEROF_TABLE, bit_array);
+    for (; off < size; off += BLOCK_SIZE)
+      block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array);
+  }
+
+  if ((bits + 1) % GMP_LIMB_BITS != 0)
+    bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+  return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);
+}
+
+#undef BLOCK_SIZE
+#undef SIEVE_MASK1
+#undef SIEVE_MASK2
+#undef SIEVE_MASKT
+#undef SIEVE_2MSK1
+#undef SIEVE_2MSK2
+#undef SIEVE_2MSKT
+#undef SET_OFF1
+#undef SET_OFF2
+#undef ROTATE1
+#undef ROTATE2

diff --git a/printf/asprintf.c b/printf/asprintf.c
new file mode 100644
index 0000000..da87b75
--- /dev/null
+++ b/printf/asprintf.c

@@ -0,0 +1,47 @@
+/* gmp_asprintf -- formatted output to an allocated space.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_asprintf (char **result, const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+
+  ret = gmp_vasprintf (result, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/printf/asprntffuns.c b/printf/asprntffuns.c
new file mode 100644
index 0000000..022a80c
--- /dev/null
+++ b/printf/asprntffuns.c

@@ -0,0 +1,71 @@
+/* __gmp_asprintf_memory etc -- formatted output to allocated space.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* These routines are in a separate file so that the mpz_t, mpq_t and mpf_t
+   operator<< routines can avoid dragging vsnprintf into the link (via
+   __gmp_asprintf_format).  */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+int
+__gmp_asprintf_memory (struct gmp_asprintf_t *d, const char *str, size_t len)
+{
+  GMP_ASPRINTF_T_NEED (d, len);
+  memcpy (d->buf + d->size, str, len);
+  d->size += len;
+  return len;
+}
+
+int
+__gmp_asprintf_reps (struct gmp_asprintf_t *d, int c, int reps)
+{
+  GMP_ASPRINTF_T_NEED (d, reps);
+  memset (d->buf + d->size, c, reps);
+  d->size += reps;
+  return reps;
+}
+
+int
+__gmp_asprintf_final (struct gmp_asprintf_t *d)
+{
+  char  *buf = d->buf;
+  ASSERT (d->alloc >= d->size + 1);
+  buf[d->size] = '\0';
+  __GMP_REALLOCATE_FUNC_MAYBE_TYPE (buf, d->alloc, d->size+1, char);
+  *d->result = buf;
+  return 0;
+}

diff --git a/printf/doprnt.c b/printf/doprnt.c
new file mode 100644
index 0000000..3b86613
--- /dev/null
+++ b/printf/doprnt.c

@@ -0,0 +1,625 @@
+/* __gmp_doprnt -- printf style formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
+
+#include "config.h"	/* needed for the HAVE_, could also move gmp incls */
+
+#include <stdarg.h>
+#include <ctype.h>     /* for isdigit */
+#include <stddef.h>    /* for ptrdiff_t */
+#include <string.h>
+#include <stdio.h>     /* for NULL */
+#include <stdlib.h>
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp-impl.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Should be portable, but in any case this is only used under some ASSERTs. */
+#define va_equal(x, y)                           \
+  (memcmp (&(x), &(y), sizeof(va_list)) == 0)
+
+
+/* printf is convenient because it allows various types to be printed in one
+   fairly compact call, so having gmp_printf support the standard types as
+   well as the gmp ones is important.  This ends up meaning all the standard
+   parsing must be duplicated, to get a new routine recognising the gmp
+   extras.
+
+   With the currently favoured handling of mpz etc as Z, Q and F type
+   markers, it's not possible to use glibc register_printf_function since
+   that only accepts new conversion characters, not new types.  If Z was a
+   conversion there'd be no way to specify hex, decimal or octal, or
+   similarly with F no way to specify fixed point or scientific format.
+
+   It seems wisest to pass conversions %f, %e and %g of float, double and
+   long double over to the standard printf.  It'd be hard to be sure of
+   getting the right handling for NaNs, rounding, etc.  Integer conversions
+   %d etc and string conversions %s on the other hand could be easily enough
+   handled within gmp_doprnt, but if floats are going to libc then it's just
+   as easy to send all non-gmp types there.
+
+   "Z" was a type marker for size_t in old glibc, but there seems no need to
+   provide access to that now "z" is standard.
+
+   In GMP 4.1.1 we documented "ll" and "L" as being equivalent, but in C99
+   in fact "ll" is just for long long and "L" just for long double.
+   Apparently GLIBC allows "L" for long long though.  This doesn't affect
+   us as such, since both are passed through to the C library.  To be
+   consistent with what we said before, the two are treated equivalently
+   here, and it's left to the C library to do what it thinks with them.
+
+   Possibilities:
+
+   "b" might be nice for binary output, and could even be supported for the
+   standard C types too if desired.
+
+   POSIX style "%n$" parameter numbering would be possible, but would need
+   to be handled completely within gmp_doprnt, since the numbering will be
+   all different once the format string it cut into pieces.
+
+   Some options for mpq formatting would be good.  Perhaps a non-zero
+   precision field could give a width for the denominator and mean always
+   put a "/".  A form "n+p/q" might interesting too, though perhaps that's
+   better left to applications.
+
+   Right now there's no way for an application to know whether types like
+   intmax_t are supported here.  If configure is doing its job and the same
+   compiler is used for gmp as for the application then there shouldn't be
+   any problem, but perhaps gmp.h should have some preprocessor symbols to
+   say what libgmp can do.  */
+
+
+
+/* If a gmp format is the very first thing or there are two gmp formats with
+   nothing in between then we'll reach here with this_fmt == last_fmt and we
+   can do nothing in that case.
+
+   last_ap is always replaced after a FLUSH, so it doesn't matter if va_list
+   is a call-by-reference and the funs->format routine modifies it.  */
+
+#define FLUSH()                                         \
+  do {                                                  \
+    if (this_fmt == last_fmt)                           \
+      {                                                 \
+	TRACE (printf ("nothing to flush\n"));          \
+	ASSERT (va_equal (this_ap, last_ap));           \
+      }                                                 \
+    else                                                \
+      {                                                 \
+	ASSERT (*this_fmt == '%');                      \
+	*this_fmt = '\0';                               \
+	TRACE (printf ("flush \"%s\"\n", last_fmt));    \
+	DOPRNT_FORMAT (last_fmt, last_ap);              \
+      }                                                 \
+  } while (0)
+
+
+/* Parse up the given format string and do the appropriate output using the
+   given "funs" routines.  The data parameter is passed through to those
+   routines.  */
+
+int
+__gmp_doprnt (const struct doprnt_funs_t *funs, void *data,
+	      const char *orig_fmt, va_list orig_ap)
+{
+  va_list  ap, this_ap, last_ap;
+  size_t   alloc_fmt_size, orig_fmt_size;
+  char     *fmt, *alloc_fmt, *last_fmt, *this_fmt, *gmp_str;
+  int      retval = 0;
+  int      type, fchar, *value, seen_precision;
+  struct doprnt_params_t param;
+
+  TRACE (printf ("gmp_doprnt \"%s\"\n", orig_fmt));
+
+  /* Don't modify orig_ap, if va_list is actually an array and hence call by
+     reference.  It could be argued that it'd be more efficient to leave the
+     caller to make a copy if it cared, but doing so here is going to be a
+     very small part of the total work, and we may as well keep applications
+     out of trouble.  */
+  va_copy (ap, orig_ap);
+
+  /* The format string is chopped up into pieces to be passed to
+     funs->format.  Unfortunately that means it has to be copied so each
+     piece can be null-terminated.  We're not going to be very fast here, so
+     use __gmp_allocate_func rather than TMP_ALLOC, to avoid overflowing the
+     stack if a long output string is given.  */
+  alloc_fmt_size = orig_fmt_size = strlen (orig_fmt) + 1;
+#if _LONG_LONG_LIMB
+  /* for a long long limb we change %Mx to %llx, so could need an extra 1
+     char for every 3 existing */
+  alloc_fmt_size += alloc_fmt_size / 3;
+#endif
+  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
+  fmt = alloc_fmt;
+  memcpy (fmt, orig_fmt, orig_fmt_size);
+
+  /* last_fmt and last_ap are just after the last output, and hence where
+     the next output will begin, when that's done */
+  last_fmt = fmt;
+  va_copy (last_ap, ap);
+
+  for (;;)
+    {
+      TRACE (printf ("next: \"%s\"\n", fmt));
+
+      fmt = strchr (fmt, '%');
+      if (fmt == NULL)
+	break;
+
+      /* this_fmt and this_ap are the current '%' sequence being considered */
+      this_fmt = fmt;
+      va_copy (this_ap, ap);
+      fmt++; /* skip the '%' */
+
+      TRACE (printf ("considering\n");
+	     printf ("  last: \"%s\"\n", last_fmt);
+	     printf ("  this: \"%s\"\n", this_fmt));
+
+      type = '\0';
+      value = &param.width;
+
+      param.base = 10;
+      param.conv = 0;
+      param.expfmt = "e%c%02ld";
+      param.exptimes4 = 0;
+      param.fill = ' ';
+      param.justify = DOPRNT_JUSTIFY_RIGHT;
+      param.prec = 6;
+      param.showbase = DOPRNT_SHOWBASE_NO;
+      param.showpoint = 0;
+      param.showtrailing = 1;
+      param.sign = '\0';
+      param.width = 0;
+      seen_precision = 0;
+
+      /* This loop parses a single % sequence.  "break" from the switch
+	 means continue with this %, "goto next" means the conversion
+	 character has been seen and a new % should be sought.  */
+      for (;;)
+	{
+	  fchar = *fmt++;
+	  if (fchar == '\0')
+	    break;
+
+	  switch (fchar) {
+
+	  case 'a':
+	    /* %a behaves like %e, but defaults to all significant digits,
+	       and there's no leading zeros on the exponent (which is in
+	       fact bit-based) */
+	    param.base = 16;
+	    param.expfmt = "p%c%ld";
+	    goto conv_a;
+	  case 'A':
+	    param.base = -16;
+	    param.expfmt = "P%c%ld";
+	  conv_a:
+	    param.conv = DOPRNT_CONV_SCIENTIFIC;
+	    param.exptimes4 = 1;
+	    if (! seen_precision)
+	      param.prec = -1;  /* default to all digits */
+	    param.showbase = DOPRNT_SHOWBASE_YES;
+	    param.showtrailing = 1;
+	    goto floating_a;
+
+	  case 'c':
+	    /* Let's assume wchar_t will be promoted to "int" in the call,
+	       the same as char will be. */
+	    (void) va_arg (ap, int);
+	    goto next;
+
+	  case 'd':
+	  case 'i':
+	  case 'u':
+	  integer:
+	    TRACE (printf ("integer, base=%d\n", param.base));
+	    if (! seen_precision)
+	      param.prec = -1;
+	    switch (type) {
+	    case 'j':
+	      /* Let's assume uintmax_t is the same size as intmax_t. */
+#if HAVE_INTMAX_T
+	      (void) va_arg (ap, intmax_t);
+#else
+	      ASSERT_FAIL (intmax_t not available);
+#endif
+	      break;
+	    case 'l':
+	      (void) va_arg (ap, long);
+	      break;
+	    case 'L':
+#if HAVE_LONG_LONG
+	      (void) va_arg (ap, long long);
+#else
+	      ASSERT_FAIL (long long not available);
+#endif
+	      break;
+	    case 'N':
+	      {
+		mp_ptr     xp;
+		mp_size_t  xsize, abs_xsize;
+		mpz_t      z;
+		FLUSH ();
+		xp = va_arg (ap, mp_ptr);
+		PTR(z) = xp;
+		xsize = (int) va_arg (ap, mp_size_t);
+		abs_xsize = ABS (xsize);
+		MPN_NORMALIZE (xp, abs_xsize);
+		SIZ(z) = (xsize >= 0 ? abs_xsize : -abs_xsize);
+		ASSERT_CODE (ALLOC(z) = abs_xsize);
+		gmp_str = mpz_get_str (NULL, param.base, z);
+		goto gmp_integer;
+	      }
+	      /* break; */
+	    case 'q':
+	      /* quad_t is probably the same as long long, but let's treat
+		 it separately just to be sure.  Also let's assume u_quad_t
+		 will be the same size as quad_t.  */
+#if HAVE_QUAD_T
+	      (void) va_arg (ap, quad_t);
+#else
+	      ASSERT_FAIL (quad_t not available);
+#endif
+	      break;
+	    case 'Q':
+	      FLUSH ();
+	      gmp_str = mpq_get_str (NULL, param.base, va_arg(ap, mpq_srcptr));
+	      goto gmp_integer;
+	    case 't':
+#if HAVE_PTRDIFF_T
+	      (void) va_arg (ap, ptrdiff_t);
+#else
+	      ASSERT_FAIL (ptrdiff_t not available);
+#endif
+	      break;
+	    case 'z':
+	      (void) va_arg (ap, size_t);
+	      break;
+	    case 'Z':
+	      {
+		int   ret;
+		FLUSH ();
+		gmp_str = mpz_get_str (NULL, param.base,
+				       va_arg (ap, mpz_srcptr));
+	      gmp_integer:
+		ret = __gmp_doprnt_integer (funs, data, &param, gmp_str);
+		 __GMP_FREE_FUNC_TYPE (gmp_str, strlen(gmp_str)+1, char);
+		DOPRNT_ACCUMULATE (ret);
+		va_copy (last_ap, ap);
+		last_fmt = fmt;
+	      }
+	      break;
+	    default:
+	      /* default is an "int", and this includes h=short and hh=char
+		 since they're promoted to int in a function call */
+	      (void) va_arg (ap, int);
+	      break;
+	    }
+	    goto next;
+
+	  case 'E':
+	    param.base = -10;
+	    param.expfmt = "E%c%02ld";
+	    /*FALLTHRU*/
+	  case 'e':
+	    param.conv = DOPRNT_CONV_SCIENTIFIC;
+	  floating:
+	    if (param.showbase == DOPRNT_SHOWBASE_NONZERO)
+	      {
+		/* # in %e, %f and %g */
+		param.showpoint = 1;
+		param.showtrailing = 1;
+	      }
+	  floating_a:
+	    switch (type) {
+	    case 'F':
+	      FLUSH ();
+	      DOPRNT_ACCUMULATE (__gmp_doprnt_mpf (funs, data, &param,
+						   GMP_DECIMAL_POINT,
+						   va_arg (ap, mpf_srcptr)));
+	      va_copy (last_ap, ap);
+	      last_fmt = fmt;
+	      break;
+	    case 'L':
+#if HAVE_LONG_DOUBLE
+	      (void) va_arg (ap, long double);
+#else
+	      ASSERT_FAIL (long double not available);
+#endif
+	      break;
+	    default:
+	      (void) va_arg (ap, double);
+	      break;
+	    }
+	    goto next;
+
+	  case 'f':
+	    param.conv = DOPRNT_CONV_FIXED;
+	    goto floating;
+
+	  case 'F': /* mpf_t     */
+	  case 'j': /* intmax_t  */
+	  case 'L': /* long long */
+	  case 'N': /* mpn       */
+	  case 'q': /* quad_t    */
+	  case 'Q': /* mpq_t     */
+	  case 't': /* ptrdiff_t */
+	  case 'z': /* size_t    */
+	  case 'Z': /* mpz_t     */
+	  set_type:
+	    type = fchar;
+	    break;
+
+	  case 'G':
+	    param.base = -10;
+	    param.expfmt = "E%c%02ld";
+	    /*FALLTHRU*/
+	  case 'g':
+	    param.conv = DOPRNT_CONV_GENERAL;
+	    param.showtrailing = 0;
+	    goto floating;
+
+	  case 'h':
+	    if (type != 'h')
+	      goto set_type;
+	    type = 'H';   /* internal code for "hh" */
+	    break;
+
+	  case 'l':
+	    if (type != 'l')
+	      goto set_type;
+	    type = 'L';   /* "ll" means "L" */
+	    break;
+
+	  case 'm':
+	    /* glibc strerror(errno), no argument */
+	    goto next;
+
+	  case 'M': /* mp_limb_t */
+	    /* mung format string to l or ll and let plain printf handle it */
+#if _LONG_LONG_LIMB
+	    memmove (fmt+1, fmt, strlen (fmt)+1);
+	    fmt[-1] = 'l';
+	    fmt[0] = 'l';
+	    fmt++;
+	    type = 'L';
+#else
+	    fmt[-1] = 'l';
+	    type = 'l';
+#endif
+	    break;
+
+	  case 'n':
+	    {
+	      void  *p;
+	      FLUSH ();
+	      p = va_arg (ap, void *);
+	      switch (type) {
+	      case '\0': * (int       *) p = retval; break;
+	      case 'F':  mpf_set_si ((mpf_ptr) p, (long) retval); break;
+	      case 'H':  * (char      *) p = retval; break;
+	      case 'h':  * (short     *) p = retval; break;
+#if HAVE_INTMAX_T
+	      case 'j':  * (intmax_t  *) p = retval; break;
+#else
+	      case 'j':  ASSERT_FAIL (intmax_t not available); break;
+#endif
+	      case 'l':  * (long      *) p = retval; break;
+#if HAVE_QUAD_T && HAVE_LONG_LONG
+	      case 'q':
+		ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
+		/*FALLTHRU*/
+#else
+	      case 'q':  ASSERT_FAIL (quad_t not available); break;
+#endif
+#if HAVE_LONG_LONG
+	      case 'L':  * (long long *) p = retval; break;
+#else
+	      case 'L':  ASSERT_FAIL (long long not available); break;
+#endif
+	      case 'N':
+		{
+		  mp_size_t  n;
+		  n = va_arg (ap, mp_size_t);
+		  n = ABS (n);
+		  if (n != 0)
+		    {
+		      * (mp_ptr) p = retval;
+		      MPN_ZERO ((mp_ptr) p + 1, n - 1);
+		    }
+		}
+		break;
+	      case 'Q':  mpq_set_si ((mpq_ptr) p, (long) retval, 1L); break;
+#if HAVE_PTRDIFF_T
+	      case 't':  * (ptrdiff_t *) p = retval; break;
+#else
+	      case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
+#endif
+	      case 'z':  * (size_t    *) p = retval; break;
+	      case 'Z':  mpz_set_si ((mpz_ptr) p, (long) retval); break;
+	      }
+	    }
+	    va_copy (last_ap, ap);
+	    last_fmt = fmt;
+	    goto next;
+
+	  case 'o':
+	    param.base = 8;
+	    goto integer;
+
+	  case 'p':
+	  case 's':
+	    /* "void *" will be good enough for "char *" or "wchar_t *", no
+	       need for separate code.  */
+	    (void) va_arg (ap, const void *);
+	    goto next;
+
+	  case 'x':
+	    param.base = 16;
+	    goto integer;
+	  case 'X':
+	    param.base = -16;
+	    goto integer;
+
+	  case '%':
+	    goto next;
+
+	  case '#':
+	    param.showbase = DOPRNT_SHOWBASE_NONZERO;
+	    break;
+
+	  case '\'':
+	    /* glibc digit grouping, just pass it through, no support for it
+	       on gmp types */
+	    break;
+
+	  case '+':
+	  case ' ':
+	    param.sign = fchar;
+	    break;
+
+	  case '-':
+	    param.justify = DOPRNT_JUSTIFY_LEFT;
+	    break;
+	  case '.':
+	    seen_precision = 1;
+	    param.prec = -1; /* "." alone means all necessary digits */
+	    value = &param.prec;
+	    break;
+
+	  case '*':
+	    {
+	      int n = va_arg (ap, int);
+
+	      if (value == &param.width)
+		{
+		  /* negative width means left justify */
+		  if (n < 0)
+		    {
+		      param.justify = DOPRNT_JUSTIFY_LEFT;
+		      n = -n;
+		    }
+		  param.width = n;
+		}
+	      else
+		{
+		  /* don't allow negative precision */
+		  param.prec = MAX (0, n);
+		}
+	    }
+	    break;
+
+	  case '0':
+	    if (value == &param.width)
+	      {
+		/* in width field, set fill */
+		param.fill = '0';
+
+		/* for right justify, put the fill after any minus sign */
+		if (param.justify == DOPRNT_JUSTIFY_RIGHT)
+		  param.justify = DOPRNT_JUSTIFY_INTERNAL;
+	      }
+	    else
+	      {
+		/* in precision field, set value */
+		*value = 0;
+	      }
+	    break;
+
+	  case '1': case '2': case '3': case '4': case '5':
+	  case '6': case '7': case '8': case '9':
+	    /* process all digits to form a value */
+	    {
+	      int  n = 0;
+	      do {
+		n = n * 10 + (fchar-'0');
+		fchar = *fmt++;
+	      } while (isascii (fchar) && isdigit (fchar));
+	      fmt--; /* unget the non-digit */
+	      *value = n;
+	    }
+	    break;
+
+	  default:
+	    /* something invalid */
+	    ASSERT (0);
+	    goto next;
+	  }
+	}
+
+    next:
+      /* Stop parsing the current "%" format, look for a new one. */
+      ;
+    }
+
+  TRACE (printf ("remainder: \"%s\"\n", last_fmt));
+  if (*last_fmt != '\0')
+    DOPRNT_FORMAT (last_fmt, last_ap);
+
+  if (funs->final != NULL)
+    if ((*funs->final) (data) == -1)
+      goto error;
+
+ done:
+  __GMP_FREE_FUNC_TYPE (alloc_fmt, alloc_fmt_size, char);
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}

diff --git a/printf/doprntf.c b/printf/doprntf.c
new file mode 100644
index 0000000..2a7e0d2
--- /dev/null
+++ b/printf/doprntf.c

@@ -0,0 +1,389 @@
+/* __gmp_doprnt_mpf -- mpf formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* The separate of __gmp_doprnt_float_digits and __gmp_doprnt_float is so
+   some C++ can do the mpf_get_str and release it in case of an exception */
+
+#define DIGIT_VALUE(c)                  \
+  (isdigit (c)   ? (c) - '0'            \
+   : islower (c) ? (c) - 'a' + 10       \
+   :               (c) - 'A' + 10)
+
+int
+__gmp_doprnt_mpf (const struct doprnt_funs_t *funs,
+		  void *data,
+		  const struct doprnt_params_t *p,
+		  const char *point,
+		  mpf_srcptr f)
+{
+  int         prec, ndigits, free_size, len, newlen, justify, justlen, explen;
+  int         showbaselen, sign, signlen, intlen, intzeros, pointlen;
+  int         fraczeros, fraclen, preczeros;
+  char        *s, *free_ptr;
+  mp_exp_t    exp;
+  char        exponent[GMP_LIMB_BITS + 10];
+  const char  *showbase;
+  int         retval = 0;
+
+  TRACE (printf ("__gmp_doprnt_float\n");
+	 printf ("  conv=%d prec=%d\n", p->conv, p->prec));
+
+  prec = p->prec;
+  if (prec <= -1)
+    {
+      /* all digits */
+      ndigits = 0;
+
+      /* arrange the fixed/scientific decision on a "prec" implied by how
+	 many significant digits there are */
+      if (p->conv == DOPRNT_CONV_GENERAL)
+	MPF_SIGNIFICANT_DIGITS (prec, PREC(f), ABS(p->base));
+    }
+  else
+    {
+      switch (p->conv) {
+      case DOPRNT_CONV_FIXED:
+	/* Precision is digits after the radix point.  Try not to generate
+	   too many more than will actually be required.  If f>=1 then
+	   overestimate the integer part, and add prec.  If f<1 then
+	   underestimate the zeros between the radix point and the first
+	   digit and subtract that from prec.  In either case add 2 so the
+	   round to nearest can be applied accurately.  Finally, we add 1 to
+	   handle the case of 1-eps where EXP(f) = 0 but mpf_get_str returns
+	   exp as 1.  */
+	ndigits = prec + 2 + 1
+	  + EXP(f) * (mp_bases[ABS(p->base)].chars_per_limb + (EXP(f)>=0));
+	ndigits = MAX (ndigits, 1);
+	break;
+
+      case DOPRNT_CONV_SCIENTIFIC:
+	/* precision is digits after the radix point, and there's one digit
+	   before */
+	ndigits = prec + 1;
+	break;
+
+      default:
+	ASSERT (0);
+	/*FALLTHRU*/
+
+      case DOPRNT_CONV_GENERAL:
+	/* precision is total digits, but be sure to ask mpf_get_str for at
+	   least 1, not 0 */
+	ndigits = MAX (prec, 1);
+	break;
+      }
+    }
+  TRACE (printf ("  ndigits %d\n", ndigits));
+
+  s = mpf_get_str (NULL, &exp, p->base, ndigits, f);
+  len = strlen (s);
+  free_ptr = s;
+  free_size = len + 1;
+  TRACE (printf ("  s   %s\n", s);
+	 printf ("  exp %ld\n", exp);
+	 printf ("  len %d\n", len));
+
+  /* For fixed mode check the ndigits formed above was in fact enough for
+     the integer part plus p->prec after the radix point. */
+  ASSERT ((p->conv == DOPRNT_CONV_FIXED && p->prec > -1)
+	  ? ndigits >= MAX (1, exp + p->prec + 2) : 1);
+
+  sign = p->sign;
+  if (s[0] == '-')
+    {
+      sign = s[0];
+      s++, len--;
+    }
+  signlen = (sign != '\0');
+  TRACE (printf ("  sign %c  signlen %d\n", sign, signlen));
+
+  switch (p->conv) {
+  case DOPRNT_CONV_FIXED:
+    if (prec <= -1)
+      prec = MAX (0, len-exp);   /* retain all digits */
+
+    /* Truncate if necessary so fraction will be at most prec digits. */
+    ASSERT (prec >= 0);
+    newlen = exp + prec;
+    if (newlen < 0)
+      {
+	/* first non-zero digit is below target prec, and at least one zero
+	   digit in between, so print zero */
+	len = 0;
+	exp = 0;
+      }
+    else if (len <= newlen)
+      {
+	/* already got few enough digits */
+      }
+    else
+      {
+	/* discard excess digits and round to nearest */
+
+	const char  *num_to_text = (p->base >= 0
+				    ? "0123456789abcdefghijklmnopqrstuvwxyz"
+				    : "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+	int  base = ABS(p->base);
+	int  n;
+
+	ASSERT (base <= 36);
+
+	len = newlen;
+	n = DIGIT_VALUE (s[len]);
+	TRACE (printf ("  rounding with %d\n", n));
+	if (n >= (base + 1) / 2)
+	  {
+	    /* propagate a carry */
+	    for (;;)
+	      {
+		if (len == 0)
+		  {
+		    s[0] = '1';
+		    len = 1;
+		    exp++;
+		    break;
+		  }
+		n = DIGIT_VALUE (s[len-1]);
+		ASSERT (n >= 0 && n < base);
+		n++;
+		if (n != base)
+		  {
+		    TRACE (printf ("  storing now %d\n", n));
+		    s[len-1] = num_to_text[n];
+		    break;
+		  }
+		len--;
+	      }
+	  }
+	else
+	  {
+	    /* truncate only, strip any trailing zeros now exposed */
+	    while (len > 0 && s[len-1] == '0')
+	      len--;
+	  }
+
+	/* Can have newlen==0, in which case the truncate was just to check
+	   for a carry turning it into "1".  If we're left with len==0 then
+	   adjust exp to match.  */
+	if (len == 0)
+	  exp = 0;
+      }
+
+  fixed:
+    ASSERT (len == 0 ? exp == 0 : 1);
+    if (exp <= 0)
+      {
+	TRACE (printf ("  fixed 0.000sss\n"));
+	intlen = 0;
+	intzeros = 1;
+	fraczeros = -exp;
+	fraclen = len;
+      }
+    else
+      {
+	TRACE (printf ("  fixed sss.sss or sss000\n"));
+	intlen = MIN (len, exp);
+	intzeros = exp - intlen;
+	fraczeros = 0;
+	fraclen = len - intlen;
+      }
+    explen = 0;
+    break;
+
+  case DOPRNT_CONV_SCIENTIFIC:
+    {
+      long int expval;
+      char  expsign;
+
+      if (prec <= -1)
+	prec = MAX (0, len-1);   /* retain all digits */
+
+    scientific:
+      TRACE (printf ("  scientific s.sss\n"));
+
+      intlen = MIN (1, len);
+      intzeros = (intlen == 0 ? 1 : 0);
+      fraczeros = 0;
+      fraclen = len - intlen;
+
+      expval = (exp-intlen);
+      if (p->exptimes4)
+	expval <<= 2;
+
+      /* Split out the sign since %o or %x in expfmt give negatives as twos
+	 complement, not with a sign. */
+      expsign = (expval >= 0 ? '+' : '-');
+      expval = ABS (expval);
+
+#if HAVE_VSNPRINTF
+      explen = snprintf (exponent, sizeof(exponent),
+			 p->expfmt, expsign, expval);
+      /* test for < sizeof-1 since a glibc 2.0.x return of sizeof-1 might
+	 mean truncation */
+      ASSERT (explen >= 0 && explen < sizeof(exponent)-1);
+#else
+      sprintf (exponent, p->expfmt, expsign, expval);
+      explen = strlen (exponent);
+      ASSERT (explen < sizeof(exponent));
+#endif
+      TRACE (printf ("  expfmt %s gives %s\n", p->expfmt, exponent));
+    }
+    break;
+
+  default:
+    ASSERT (0);
+    /*FALLTHRU*/  /* to stop variables looking uninitialized */
+
+  case DOPRNT_CONV_GENERAL:
+    /* The exponent for "scientific" will be exp-1, choose scientific if
+       this is < -4 or >= prec (and minimum 1 for prec).  For f==0 will have
+       exp==0 and get the desired "fixed".  This rule follows glibc.  For
+       fixed there's no need to truncate, the desired ndigits will already
+       be as required.  */
+    if (exp-1 < -4 || exp-1 >= MAX (1, prec))
+      goto scientific;
+    else
+      goto fixed;
+  }
+
+  TRACE (printf ("  intlen %d intzeros %d fraczeros %d fraclen %d\n",
+		 intlen, intzeros, fraczeros, fraclen));
+  ASSERT (p->prec <= -1
+	  ? intlen + fraclen == strlen (s)
+	  : intlen + fraclen <= strlen (s));
+
+  if (p->showtrailing)
+    {
+      /* Pad to requested precision with trailing zeros, for general this is
+	 all digits, for fixed and scientific just the fraction.  */
+      preczeros = prec - (fraczeros + fraclen
+			  + (p->conv == DOPRNT_CONV_GENERAL
+			     ? intlen + intzeros : 0));
+      preczeros = MAX (0, preczeros);
+    }
+  else
+    preczeros = 0;
+  TRACE (printf ("  prec=%d showtrailing=%d, pad with preczeros %d\n",
+		 prec, p->showtrailing, preczeros));
+
+  /* radix point if needed, or if forced */
+  pointlen = ((fraczeros + fraclen + preczeros) != 0 || p->showpoint != 0)
+    ? strlen (point) : 0;
+  TRACE (printf ("  point |%s|  pointlen %d\n", point, pointlen));
+
+  /* Notice the test for a non-zero value is done after any truncation for
+     DOPRNT_CONV_FIXED. */
+  showbase = NULL;
+  showbaselen = 0;
+  switch (p->showbase) {
+  default:
+    ASSERT (0);
+    /*FALLTHRU*/
+  case DOPRNT_SHOWBASE_NO:
+    break;
+  case DOPRNT_SHOWBASE_NONZERO:
+    if (intlen == 0 && fraclen == 0)
+      break;
+    /*FALLTHRU*/
+  case DOPRNT_SHOWBASE_YES:
+    switch (p->base) {
+    case 16:  showbase = "0x"; showbaselen = 2; break;
+    case -16: showbase = "0X"; showbaselen = 2; break;
+    case 8:   showbase = "0";  showbaselen = 1; break;
+    }
+    break;
+  }
+  TRACE (printf ("  showbase %s showbaselen %d\n",
+		 showbase == NULL ? "" : showbase, showbaselen));
+
+  /* left over field width */
+  justlen = p->width - (signlen + showbaselen + intlen + intzeros + pointlen
+			+ fraczeros + fraclen + preczeros + explen);
+  TRACE (printf ("  justlen %d fill 0x%X\n", justlen, p->fill));
+
+  justify = p->justify;
+  if (justlen <= 0) /* no justifying if exceed width */
+    justify = DOPRNT_JUSTIFY_NONE;
+
+  TRACE (printf ("  justify type %d  intlen %d pointlen %d fraclen %d\n",
+		 justify, intlen, pointlen, fraclen));
+
+  if (justify == DOPRNT_JUSTIFY_RIGHT)         /* pad for right */
+    DOPRNT_REPS (p->fill, justlen);
+
+  if (signlen)                                 /* sign */
+    DOPRNT_REPS (sign, 1);
+
+  DOPRNT_MEMORY_MAYBE (showbase, showbaselen); /* base */
+
+  if (justify == DOPRNT_JUSTIFY_INTERNAL)      /* pad for internal */
+    DOPRNT_REPS (p->fill, justlen);
+
+  DOPRNT_MEMORY (s, intlen);                   /* integer */
+  DOPRNT_REPS_MAYBE ('0', intzeros);
+
+  DOPRNT_MEMORY_MAYBE (point, pointlen);       /* point */
+
+  DOPRNT_REPS_MAYBE ('0', fraczeros);          /* frac */
+  DOPRNT_MEMORY_MAYBE (s+intlen, fraclen);
+
+  DOPRNT_REPS_MAYBE ('0', preczeros);          /* prec */
+
+  DOPRNT_MEMORY_MAYBE (exponent, explen);      /* exp */
+
+  if (justify == DOPRNT_JUSTIFY_LEFT)          /* pad for left */
+    DOPRNT_REPS (p->fill, justlen);
+
+ done:
+  __GMP_FREE_FUNC_TYPE (free_ptr, free_size, char);
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}

diff --git a/printf/doprnti.c b/printf/doprnti.c
new file mode 100644
index 0000000..61ff643
--- /dev/null
+++ b/printf/doprnti.c

@@ -0,0 +1,136 @@
+/* __gmp_doprnt_integer -- integer style formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+
+
+int
+__gmp_doprnt_integer (const struct doprnt_funs_t *funs,
+		      void *data,
+		      const struct doprnt_params_t *p,
+		      const char *s)
+{
+  int         retval = 0;
+  int         slen, justlen, showbaselen, sign, signlen, slashlen, zeros;
+  int         justify, den_showbaselen;
+  const char  *slash, *showbase;
+
+  /* '+' or ' ' if wanted, and don't already have '-' */
+  sign = p->sign;
+  if (s[0] == '-')
+    {
+      sign = s[0];
+      s++;
+    }
+  signlen = (sign != '\0');
+
+  /* if the precision was explicitly 0, print nothing for a 0 value */
+  if (*s == '0' && p->prec == 0)
+    s++;
+
+  slen = strlen (s);
+  slash = strchr (s, '/');
+
+  showbase = NULL;
+  showbaselen = 0;
+
+  if (p->showbase != DOPRNT_SHOWBASE_NO)
+    {
+      switch (p->base) {
+      case 16:  showbase = "0x"; showbaselen = 2; break;
+      case -16: showbase = "0X"; showbaselen = 2; break;
+      case 8:   showbase = "0";  showbaselen = 1; break;
+      }
+    }
+
+  den_showbaselen = showbaselen;
+  if (slash == NULL
+      || (p->showbase == DOPRNT_SHOWBASE_NONZERO && slash[1] == '0'))
+    den_showbaselen = 0;
+
+  if (p->showbase == DOPRNT_SHOWBASE_NONZERO && s[0] == '0')
+    showbaselen = 0;
+
+  /* the influence of p->prec on mpq is currently undefined */
+  zeros = MAX (0, p->prec - slen);
+
+  /* space left over after actual output length */
+  justlen = p->width
+    - (strlen(s) + signlen + showbaselen + den_showbaselen + zeros);
+
+  justify = p->justify;
+  if (justlen <= 0) /* no justifying if exceed width */
+    justify = DOPRNT_JUSTIFY_NONE;
+
+  if (justify == DOPRNT_JUSTIFY_RIGHT)             /* pad right */
+    DOPRNT_REPS (p->fill, justlen);
+
+  DOPRNT_REPS_MAYBE (sign, signlen);               /* sign */
+
+  DOPRNT_MEMORY_MAYBE (showbase, showbaselen);     /* base */
+
+  DOPRNT_REPS_MAYBE ('0', zeros);                  /* zeros */
+
+  if (justify == DOPRNT_JUSTIFY_INTERNAL)          /* pad internal */
+    DOPRNT_REPS (p->fill, justlen);
+
+  /* if there's a showbase on the denominator, then print the numerator
+     separately so it can be inserted */
+  if (den_showbaselen != 0)
+    {
+      ASSERT (slash != NULL);
+      slashlen = slash+1 - s;
+      DOPRNT_MEMORY (s, slashlen);                 /* numerator and slash */
+      slen -= slashlen;
+      s += slashlen;
+      DOPRNT_MEMORY (showbase, den_showbaselen);
+    }
+
+  DOPRNT_MEMORY (s, slen);                         /* number, or denominator */
+
+  if (justify == DOPRNT_JUSTIFY_LEFT)              /* pad left */
+    DOPRNT_REPS (p->fill, justlen);
+
+ done:
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}

diff --git a/printf/fprintf.c b/printf/fprintf.c
new file mode 100644
index 0000000..0008b3b
--- /dev/null
+++ b/printf/fprintf.c

@@ -0,0 +1,48 @@
+/* gmp_fprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_fprintf (FILE *fp, const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+
+  ret = __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/printf/obprintf.c b/printf/obprintf.c
new file mode 100644
index 0000000..c12d6de
--- /dev/null
+++ b/printf/obprintf.c

@@ -0,0 +1,60 @@
+/* gmp_obstack_printf -- formatted output to an obstack.
+
+Copyright 2001, 2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#include <stdarg.h>
+#include <obstack.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_obstack_printf (struct obstack *ob, const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+
+  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),
+                           fmt, strlen(fmt)+1));
+
+  ret = __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);
+  va_end (ap);
+  return ret;
+}
+
+#else
+typedef int __gmp_dummy_typedef;
+#endif /* HAVE_OBSTACK_VPRINTF */

diff --git a/printf/obprntffuns.c b/printf/obprntffuns.c
new file mode 100644
index 0000000..a23e4e8
--- /dev/null
+++ b/printf/obprntffuns.c

@@ -0,0 +1,73 @@
+/* __gmp_obstack_printf_funs -- support for gmp_obstack_printf and
+   gmp_obstack_vprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#define _GNU_SOURCE   /* ask glibc <stdio.h> for obstack_vprintf */
+
+#include <stdarg.h>
+#include <stdio.h>    /* for obstack_vprintf */
+#include <string.h>
+#include <obstack.h>
+
+#include "gmp-impl.h"
+
+
+static int
+gmp_obstack_memory (struct obstack *ob, const char *ptr, size_t len)
+{
+  obstack_grow (ob, ptr, len);
+  return len;
+}
+
+static int
+gmp_obstack_reps (struct obstack *ob, int c, int reps)
+{
+  obstack_blank (ob, reps);
+  memset ((char *) obstack_next_free(ob) - reps, c, reps);
+  return reps;
+}
+
+const struct doprnt_funs_t  __gmp_obstack_printf_funs = {
+  (doprnt_format_t) obstack_vprintf,
+  (doprnt_memory_t) gmp_obstack_memory,
+  (doprnt_reps_t)   gmp_obstack_reps
+};
+
+#else
+typedef int __gmp_dummy_typedef;
+#endif /* HAVE_OBSTACK_VPRINTF */

diff --git a/printf/obvprintf.c b/printf/obvprintf.c
new file mode 100644
index 0000000..7563c11
--- /dev/null
+++ b/printf/obvprintf.c

@@ -0,0 +1,53 @@
+/* gmp_obstack_vprintf -- formatted output to an obstack.
+
+Copyright 2001, 2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#include <stdarg.h>
+#include <obstack.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_obstack_vprintf (struct obstack *ob, const char *fmt, va_list ap)
+{
+  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),
+                           fmt, strlen(fmt)+1));
+
+  return __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);
+}
+
+#else
+typedef int __gmp_dummy_typedef;
+#endif /* HAVE_OBSTACK_VPRINTF */

diff --git a/printf/printf.c b/printf/printf.c
new file mode 100644
index 0000000..4becb0b
--- /dev/null
+++ b/printf/printf.c

@@ -0,0 +1,48 @@
+/* gmp_printf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_printf (const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+
+  ret = __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/printf/printffuns.c b/printf/printffuns.c
new file mode 100644
index 0000000..957381d
--- /dev/null
+++ b/printf/printffuns.c

@@ -0,0 +1,79 @@
+/* __gmp_fprintf_funs -- support for formatted output to FILEs.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+/* SunOS 4 stdio.h doesn't provide a prototype for this */
+#if ! HAVE_DECL_VFPRINTF
+int vfprintf (FILE *, const char *, va_list);
+#endif
+
+
+static int
+gmp_fprintf_memory (FILE *fp, const char *str, size_t len)
+{
+  return fwrite (str, 1, len, fp);
+}
+
+/* glibc putc is a function, at least when it's in multi-threaded mode or
+   some such, so fwrite chunks instead of making many calls. */
+static int
+gmp_fprintf_reps (FILE *fp, int c, int reps)
+{
+  char  buf[256];
+  int   i, piece, ret;
+  ASSERT (reps >= 0);
+
+  memset (buf, c, MIN (reps, sizeof (buf)));
+  for (i = reps; i > 0; i -= sizeof (buf))
+    {
+      piece = MIN (i, sizeof (buf));
+      ret = fwrite (buf, 1, piece, fp);
+      if (ret == -1)
+        return ret;
+      ASSERT (ret == piece);
+    }
+
+  return reps;
+}
+
+const struct doprnt_funs_t  __gmp_fprintf_funs = {
+  (doprnt_format_t) vfprintf,
+  (doprnt_memory_t) gmp_fprintf_memory,
+  (doprnt_reps_t)   gmp_fprintf_reps,
+};

diff --git a/printf/repl-vsnprintf.c b/printf/repl-vsnprintf.c
new file mode 100644
index 0000000..81b404d
--- /dev/null
+++ b/printf/repl-vsnprintf.c

@@ -0,0 +1,393 @@
+/* __gmp_replacement_vsnprintf -- for systems which don't have vsnprintf, or
+   only have a broken one.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#define _GNU_SOURCE    /* for strnlen prototype */
+
+#include <stdarg.h>
+#include <ctype.h>     /* for isdigit */
+#include <stddef.h>    /* for ptrdiff_t */
+#include <string.h>
+#include <stdio.h>     /* for NULL */
+#include <stdlib.h>
+
+#if HAVE_FLOAT_H
+#include <float.h>     /* for DBL_MAX_10_EXP etc */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp-impl.h"
+
+
+#if ! HAVE_VSNPRINTF   /* only need this file if we don't have vsnprintf */
+
+/* Autoconf notes that AIX 4.3 has a broken strnlen, but fortunately it
+   doesn't affect us since __gmp_replacement_vsnprintf is not required on
+   that system.  */
+#if ! HAVE_STRNLEN
+static size_t
+strnlen (const char *s, size_t n)
+{
+  size_t  i;
+  for (i = 0; i < n; i++)
+    if (s[i] == '\0')
+      break;
+  return i;
+}
+#endif
+
+
+/* The approach here is to parse the fmt string, and decide how much space
+   it requires, then use vsprintf into a big enough buffer.  The space
+   calculated isn't an exact amount, but it's certainly no less than
+   required.
+
+   This code was inspired by GNU libiberty/vasprintf.c but we support more
+   datatypes, when available.
+
+   mingw32 - doesn't have vsnprintf, it seems.  Because gcc is used a full
+       set of types are available, but "long double" is just a plain IEEE
+       64-bit "double" and LDBL_MAX_EXP_10 is correspondingly defined, so we
+       avoid the big 15-bit exponent estimate.  */
+
+int
+__gmp_replacement_vsnprintf (char *buf, size_t buf_size,
+			     const char *orig_fmt, va_list orig_ap)
+{
+  va_list     ap;
+  const char  *fmt;
+  size_t      total_width, integer_sizeof, floating_sizeof, len;
+  char        fchar, type;
+  int         width, prec, seen_prec, double_digits, long_double_digits;
+  int         *value;
+
+  /* preserve orig_ap for use after size estimation */
+  va_copy (ap, orig_ap);
+
+  fmt = orig_fmt;
+  total_width = strlen (fmt) + 1;   /* 1 extra for the '\0' */
+
+  integer_sizeof = sizeof (long);
+#if HAVE_LONG_LONG
+  integer_sizeof = MAX (integer_sizeof, sizeof (long long));
+#endif
+#if HAVE_QUAD_T
+  integer_sizeof = MAX (integer_sizeof, sizeof (quad_t));
+#endif
+
+  floating_sizeof = sizeof (double);
+#if HAVE_LONG_DOUBLE
+  floating_sizeof = MAX (floating_sizeof, sizeof (long double));
+#endif
+
+  /* IEEE double or VAX G floats have an 11 bit exponent, so the default is
+     a maximum 308 decimal digits.  VAX D floats have only an 8 bit
+     exponent, but we don't bother trying to detect that directly.  */
+  double_digits = 308;
+#ifdef DBL_MAX_10_EXP
+  /* but in any case prefer a value the compiler says */
+  double_digits = DBL_MAX_10_EXP;
+#endif
+
+  /* IEEE 128-bit quad, Intel 80-bit temporary, or VAX H floats all have 15
+     bit exponents, so the default is a maximum 4932 decimal digits.  */
+  long_double_digits = 4932;
+  /* but if double == long double, then go with that size */
+#if HAVE_LONG_DOUBLE
+  if (sizeof (double) == sizeof (long double))
+    long_double_digits = double_digits;
+#endif
+#ifdef LDBL_MAX_10_EXP
+  /* but in any case prefer a value the compiler says */
+  long_double_digits = LDBL_MAX_10_EXP;
+#endif
+
+  for (;;)
+    {
+      fmt = strchr (fmt, '%');
+      if (fmt == NULL)
+	break;
+      fmt++;
+
+      type = '\0';
+      width = 0;
+      prec = 6;
+      seen_prec = 0;
+      value = &width;
+
+      for (;;)
+	{
+	  fchar = *fmt++;
+	  switch (fchar) {
+
+	  case 'c':
+	    /* char, already accounted for by strlen(fmt) */
+	    goto next;
+
+	  case 'd':
+	  case 'i':
+	  case 'o':
+	  case 'x':
+	  case 'X':
+	  case 'u':
+	    /* at most 3 digits per byte in hex, dec or octal, plus a sign */
+	    total_width += 3 * integer_sizeof + 1;
+
+	    switch (type) {
+	    case 'j':
+	      /* Let's assume uintmax_t is the same size as intmax_t. */
+#if HAVE_INTMAX_T
+	      (void) va_arg (ap, intmax_t);
+#else
+	      ASSERT_FAIL (intmax_t not available);
+#endif
+	      break;
+	    case 'l':
+	      (void) va_arg (ap, long);
+	      break;
+	    case 'L':
+#if HAVE_LONG_LONG
+	      (void) va_arg (ap, long long);
+#else
+	      ASSERT_FAIL (long long not available);
+#endif
+	      break;
+	    case 'q':
+	      /* quad_t is probably the same as long long, but let's treat
+		 it separately just to be sure.  Also let's assume u_quad_t
+		 will be the same size as quad_t.  */
+#if HAVE_QUAD_T
+	      (void) va_arg (ap, quad_t);
+#else
+	      ASSERT_FAIL (quad_t not available);
+#endif
+	      break;
+	    case 't':
+#if HAVE_PTRDIFF_T
+	      (void) va_arg (ap, ptrdiff_t);
+#else
+	      ASSERT_FAIL (ptrdiff_t not available);
+#endif
+	      break;
+	    case 'z':
+	      (void) va_arg (ap, size_t);
+	      break;
+	    default:
+	      /* default is an "int", and this includes h=short and hh=char
+		 since they're promoted to int in a function call */
+	      (void) va_arg (ap, int);
+	      break;
+	    }
+	    goto next;
+
+	  case 'E':
+	  case 'e':
+	  case 'G':
+	  case 'g':
+	    /* Requested decimals, sign, point and e, plus an overestimate
+	       of exponent digits (the assumption is all the float is
+	       exponent!).  */
+	    total_width += prec + 3 + floating_sizeof * 3;
+	    if (type == 'L')
+	      {
+#if HAVE_LONG_DOUBLE
+		(void) va_arg (ap, long double);
+#else
+		ASSERT_FAIL (long double not available);
+#endif
+	      }
+	    else
+	      (void) va_arg (ap, double);
+	    goto next;
+
+	  case 'f':
+	    /* Requested decimals, sign and point, and a margin for error,
+	       then add the maximum digits that can be in the integer part,
+	       based on the maximum exponent value. */
+	    total_width += prec + 2 + 10;
+	    if (type == 'L')
+	      {
+#if HAVE_LONG_DOUBLE
+		(void) va_arg (ap, long double);
+		total_width += long_double_digits;
+#else
+		ASSERT_FAIL (long double not available);
+#endif
+	      }
+	    else
+	      {
+		(void) va_arg (ap, double);
+		total_width += double_digits;
+	      }
+	    goto next;
+
+	  case 'h':  /* short or char */
+	  case 'j':  /* intmax_t */
+	  case 'L':  /* long long or long double */
+	  case 'q':  /* quad_t */
+	  case 't':  /* ptrdiff_t */
+	  case 'z':  /* size_t */
+	  set_type:
+	    type = fchar;
+	    break;
+
+	  case 'l':
+	    /* long or long long */
+	    if (type != 'l')
+	      goto set_type;
+	    type = 'L';   /* "ll" means "L" */
+	    break;
+
+	  case 'n':
+	    /* bytes written, no output as such */
+	    (void) va_arg (ap, void *);
+	    goto next;
+
+	  case 's':
+	    /* If no precision was given, then determine the string length
+	       and put it there, to be added to the total under "next".  If
+	       a precision was given then that's already the maximum from
+	       this field, but see whether the string is shorter than that,
+	       in case the limit was very big.  */
+	    {
+	      const char  *s = va_arg (ap, const char *);
+	      prec = (seen_prec ? strnlen (s, prec) : strlen (s));
+	    }
+	    goto next;
+
+	  case 'p':
+	    /* pointer, let's assume at worst it's octal with some padding */
+	    (void) va_arg (ap, const void *);
+	    total_width += 3 * sizeof (void *) + 16;
+	    goto next;
+
+	  case '%':
+	    /* literal %, already accounted for by strlen(fmt) */
+	    goto next;
+
+	  case '#':
+	    /* showbase, at most 2 for "0x" */
+	    total_width += 2;
+	    break;
+
+	  case '+':
+	  case ' ':
+	    /* sign, already accounted for under numerics */
+	    break;
+
+	  case '-':
+	    /* left justify, no effect on total width */
+	    break;
+
+	  case '.':
+	    seen_prec = 1;
+	    value = &prec;
+	    break;
+
+	  case '*':
+	    {
+	      /* negative width means left justify which can be ignored,
+		 negative prec would be invalid, just use absolute value */
+	      int n = va_arg (ap, int);
+	      *value = ABS (n);
+	    }
+	    break;
+
+	  case '0': case '1': case '2': case '3': case '4':
+	  case '5': case '6': case '7': case '8': case '9':
+	    /* process all digits to form a value */
+	    {
+	      int  n = 0;
+	      do {
+		n = n * 10 + (fchar-'0');
+		fchar = *fmt++;
+	      } while (isascii (fchar) && isdigit (fchar));
+	      fmt--; /* unget the non-digit */
+	      *value = n;
+	    }
+	    break;
+
+	  default:
+	    /* incomplete or invalid % sequence */
+	    ASSERT (0);
+	    goto next;
+	  }
+	}
+
+    next:
+      total_width += width;
+      total_width += prec;
+    }
+
+  if (total_width <= buf_size)
+    {
+      vsprintf (buf, orig_fmt, orig_ap);
+      len = strlen (buf);
+    }
+  else
+    {
+      char  *s;
+
+      s = __GMP_ALLOCATE_FUNC_TYPE (total_width, char);
+      vsprintf (s, orig_fmt, orig_ap);
+      len = strlen (s);
+      if (buf_size != 0)
+	{
+	  size_t  copylen = MIN (len, buf_size-1);
+	  memcpy (buf, s, copylen);
+	  buf[copylen] = '\0';
+	}
+      __GMP_FREE_FUNC_TYPE (s, total_width, char);
+    }
+
+  /* If total_width was somehow wrong then chances are we've already
+     clobbered memory, but maybe this check will still work.  */
+  ASSERT_ALWAYS (len < total_width);
+
+  return len;
+}
+
+#endif /* ! HAVE_VSNPRINTF */

diff --git a/printf/snprintf.c b/printf/snprintf.c
new file mode 100644
index 0000000..8da33f8
--- /dev/null
+++ b/printf/snprintf.c

@@ -0,0 +1,53 @@
+/* gmp_snprintf -- formatted output to an fixed size buffer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <string.h>    /* for strlen */
+
+#include "gmp-impl.h"
+
+
+int
+gmp_snprintf (char *buf, size_t size, const char *fmt, ...)
+{
+  struct gmp_snprintf_t d;
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+  d.buf = buf;
+  d.size = size;
+
+  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));
+
+  ret = __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/printf/snprntffuns.c b/printf/snprntffuns.c
new file mode 100644
index 0000000..885c7ab
--- /dev/null
+++ b/printf/snprntffuns.c

@@ -0,0 +1,157 @@
+/* __gmp_snprintf_funs -- support for gmp_snprintf and gmp_vsnprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+#if ! HAVE_VSNPRINTF
+#define vsnprintf  __gmp_replacement_vsnprintf
+#endif
+
+
+/* glibc 2.0.x vsnprintf returns either -1 or size-1 for an overflow, with
+   no indication how big the output would have been.  It's necessary to
+   re-run to determine that size.
+
+   "size-1" would mean success from a C99 vsnprintf, and the re-run is
+   unnecessary in this case, but we don't bother to try to detect what sort
+   of vsnprintf we've got.  size-1 should occur rarely in normal
+   circumstances.
+
+   vsnprintf might trash it's given ap (it does for instance in glibc 2.1.3
+   on powerpc), so copy it in case we need to use it to probe for the size
+   output that would have been produced.  Note there's no need to preserve
+   it for our callers, just for ourselves.  */
+
+static int
+gmp_snprintf_format (struct gmp_snprintf_t *d, const char *fmt,
+                     va_list orig_ap)
+{
+  int      ret;
+  size_t   step, alloc, avail;
+  va_list  ap;
+  char     *p;
+
+  ASSERT (d->size >= 0);
+
+  avail = d->size;
+  if (avail > 1)
+    {
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (d->buf, avail, fmt, ap);
+      if (ret == -1)
+        return ret;
+
+      step = MIN (ret, avail-1);
+      d->size -= step;
+      d->buf += step;
+
+      if (ret != avail-1)
+        return ret;
+
+      /* probably glibc 2.0.x truncated output, probe for actual size */
+      alloc = MAX (128, ret);
+    }
+  else
+    {
+      /* no space to write anything, just probe for size */
+      alloc = 128;
+    }
+
+  do
+    {
+      alloc *= 2;
+      p = __GMP_ALLOCATE_FUNC_TYPE (alloc, char);
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (p, alloc, fmt, ap);
+      __GMP_FREE_FUNC_TYPE (p, alloc, char);
+    }
+  while (ret == alloc-1);
+
+  return ret;
+}
+
+static int
+gmp_snprintf_memory (struct gmp_snprintf_t *d, const char *str, size_t len)
+{
+  size_t n;
+
+  ASSERT (d->size >= 0);
+
+  if (d->size > 1)
+    {
+      n = MIN (d->size-1, len);
+      memcpy (d->buf, str, n);
+      d->buf += n;
+      d->size -= n;
+    }
+  return len;
+}
+
+static int
+gmp_snprintf_reps (struct gmp_snprintf_t *d, int c, int reps)
+{
+  size_t n;
+
+  ASSERT (reps >= 0);
+  ASSERT (d->size >= 0);
+
+  if (d->size > 1)
+    {
+      n = MIN (d->size-1, reps);
+      memset (d->buf, c, n);
+      d->buf += n;
+      d->size -= n;
+    }
+  return reps;
+}
+
+static int
+gmp_snprintf_final (struct gmp_snprintf_t *d)
+{
+  if (d->size >= 1)
+    d->buf[0] = '\0';
+  return 0;
+}
+
+const struct doprnt_funs_t  __gmp_snprintf_funs = {
+  (doprnt_format_t) gmp_snprintf_format,
+  (doprnt_memory_t) gmp_snprintf_memory,
+  (doprnt_reps_t)   gmp_snprintf_reps,
+  (doprnt_final_t)  gmp_snprintf_final
+};

diff --git a/printf/sprintf.c b/printf/sprintf.c
new file mode 100644
index 0000000..0952a53
--- /dev/null
+++ b/printf/sprintf.c

@@ -0,0 +1,54 @@
+/* gmp_sprintf -- formatted output to an unrestricted string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <string.h>    /* for strlen */
+
+#include "gmp-impl.h"
+
+
+int
+gmp_sprintf (char *buf, const char *fmt, ...)
+{
+#if WANT_ASSERT
+  int      fmtlen = strlen(fmt);
+#endif
+  va_list  ap;
+  int      ret;
+
+  va_start (ap, fmt);
+
+  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);
+  va_end (ap);
+
+  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));
+
+  return ret;
+}

diff --git a/printf/sprintffuns.c b/printf/sprintffuns.c
new file mode 100644
index 0000000..6781f25
--- /dev/null
+++ b/printf/sprintffuns.c

@@ -0,0 +1,94 @@
+/* __gmp_sprintf_funs -- support for gmp_sprintf and gmp_vsprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+/* The data parameter "bufp" points to a "char *buf" which is the next
+   character to be written, having started as the destination from the
+   application.  This is then increased each time output is produced.  */
+
+
+/* If vsprintf returns -1 then pass it upwards.  It doesn't matter that
+   "*bufp" is ruined in this case, since gmp_doprint will bail out
+   immediately anyway.  */
+static int
+gmp_sprintf_format (char **bufp, const char *fmt, va_list ap)
+{
+  char  *buf = *bufp;
+  int   ret;
+  vsprintf (buf, fmt, ap);
+  ret = strlen (buf);
+  *bufp = buf + ret;
+  return ret;
+}
+
+static int
+gmp_sprintf_memory (char **bufp, const char *str, size_t len)
+{
+  char  *buf = *bufp;
+  *bufp = buf + len;
+  memcpy (buf, str, len);
+  return len;
+}
+
+static int
+gmp_sprintf_reps (char **bufp, int c, int reps)
+{
+  char  *buf = *bufp;
+  ASSERT (reps >= 0);
+  *bufp = buf + reps;
+  memset (buf, c, reps);
+  return reps;
+}
+
+static int
+gmp_sprintf_final (char **bufp)
+{
+  char  *buf = *bufp;
+  *buf = '\0';
+  return 0;
+}
+
+const struct doprnt_funs_t  __gmp_sprintf_funs = {
+  (doprnt_format_t) gmp_sprintf_format,
+  (doprnt_memory_t) gmp_sprintf_memory,
+  (doprnt_reps_t)   gmp_sprintf_reps,
+  (doprnt_final_t)  gmp_sprintf_final
+};

diff --git a/printf/vasprintf.c b/printf/vasprintf.c
new file mode 100644
index 0000000..8a29a12
--- /dev/null
+++ b/printf/vasprintf.c

@@ -0,0 +1,116 @@
+/* gmp_vasprintf -- formatted output to an allocated space.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+
+#if ! HAVE_VSNPRINTF
+#define vsnprintf  __gmp_replacement_vsnprintf
+#endif
+
+
+/* vasprintf isn't used since we prefer all GMP allocs to go through
+   __gmp_allocate_func, and in particular we don't want the -1 return from
+   vasprintf for out-of-memory, instead __gmp_allocate_func should handle
+   that.  Using vsnprintf unfortunately means we might have to re-run it if
+   our current space is insufficient.
+
+   The initial guess for the needed space is an arbitrary 256 bytes.  If
+   that (and any extra GMP_ASPRINTF_T_NEED might give) isn't enough then an
+   ISO C99 standard vsnprintf will tell us what we really need.
+
+   GLIBC 2.0.x vsnprintf returns either -1 or space-1 to indicate overflow,
+   without giving any indication how much is really needed.  In this case
+   keep trying with double the space each time.
+
+   A return of space-1 is success on a C99 vsnprintf, but we're not
+   bothering to identify which style vsnprintf we've got, so just take the
+   pessimistic option and assume it's glibc 2.0.x.
+
+   Notice the use of ret+2 for the new space in the C99 case.  This ensures
+   the next vsnprintf return value will be space-2, which is unambiguously
+   successful.  But actually GMP_ASPRINTF_T_NEED() will realloc to even
+   bigger than that ret+2.
+
+   vsnprintf might trash it's given ap, so copy it in case we need to use it
+   more than once.  See comments with gmp_snprintf_format.  */
+
+static int
+gmp_asprintf_format (struct gmp_asprintf_t *d, const char *fmt,
+                     va_list orig_ap)
+{
+  int      ret;
+  va_list  ap;
+  size_t   space = 256;
+
+  for (;;)
+    {
+      GMP_ASPRINTF_T_NEED (d, space);
+      space = d->alloc - d->size;
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (d->buf + d->size, space, fmt, ap);
+      if (ret == -1)
+        {
+          ASSERT (strlen (d->buf + d->size) == space-1);
+          ret = space-1;
+        }
+
+      /* done if output fits in our space */
+      if (ret < space-1)
+        break;
+
+      if (ret == space-1)
+        space *= 2;     /* possible glibc 2.0.x, so double */
+      else
+        space = ret+2;  /* C99, so now know space required */
+    }
+
+  d->size += ret;
+  return ret;
+}
+
+const struct doprnt_funs_t  __gmp_asprintf_funs = {
+  (doprnt_format_t) gmp_asprintf_format,
+  (doprnt_memory_t) __gmp_asprintf_memory,
+  (doprnt_reps_t)   __gmp_asprintf_reps,
+  (doprnt_final_t)  __gmp_asprintf_final
+};
+
+int
+gmp_vasprintf (char **result, const char *fmt, va_list ap)
+{
+  struct gmp_asprintf_t  d;
+  GMP_ASPRINTF_T_INIT (d, result);
+  return __gmp_doprnt (&__gmp_asprintf_funs, &d, fmt, ap);
+}

diff --git a/printf/vfprintf.c b/printf/vfprintf.c
new file mode 100644
index 0000000..b2d1906
--- /dev/null
+++ b/printf/vfprintf.c

@@ -0,0 +1,41 @@
+/* gmp_vfprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vfprintf (FILE *fp, const char *fmt, va_list ap)
+{
+  return __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);
+}

diff --git a/printf/vprintf.c b/printf/vprintf.c
new file mode 100644
index 0000000..60a2233
--- /dev/null
+++ b/printf/vprintf.c

@@ -0,0 +1,41 @@
+/* gmp_vprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vprintf (const char *fmt, va_list ap)
+{
+  return __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);
+}

diff --git a/printf/vsnprintf.c b/printf/vsnprintf.c
new file mode 100644
index 0000000..2432f5d
--- /dev/null
+++ b/printf/vsnprintf.c

@@ -0,0 +1,47 @@
+/* gmp_vsnprintf -- formatted output to an fixed size buffer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <string.h>    /* for strlen */
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vsnprintf (char *buf, size_t size, const char *fmt, va_list ap)
+{
+  struct gmp_snprintf_t d;
+
+  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));
+
+  d.buf = buf;
+  d.size = size;
+  return __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);
+}

diff --git a/printf/vsprintf.c b/printf/vsprintf.c
new file mode 100644
index 0000000..26de193
--- /dev/null
+++ b/printf/vsprintf.c

@@ -0,0 +1,50 @@
+/* gmp_vsprintf -- formatted output to an unrestricted string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <string.h>    /* for strlen */
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vsprintf (char *buf, const char *fmt, va_list ap)
+{
+#if WANT_ASSERT
+  int  fmtlen = strlen(fmt);
+#endif
+  int  ret;
+
+  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);
+
+  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));
+
+  return ret;
+}

diff --git a/rand/rand.c b/rand/rand.c
new file mode 100644
index 0000000..e594c7d
--- /dev/null
+++ b/rand/rand.c

@@ -0,0 +1,51 @@
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+
+#include "gmp-impl.h"
+
+void
+gmp_randinit (gmp_randstate_ptr rstate, gmp_randalg_t alg, ...)
+{
+  va_list ap;
+  va_start (ap, alg);
+
+  switch (alg) {
+  case GMP_RAND_ALG_LC:
+    if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
+      gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+    break;
+  default:
+    gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+    break;
+  }
+  va_end (ap);
+}

diff --git a/rand/randbui.c b/rand/randbui.c
new file mode 100644
index 0000000..de9e95b
--- /dev/null
+++ b/rand/randbui.c

@@ -0,0 +1,56 @@
+/* gmp_urandomb_ui -- random bits returned in a ulong.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
+   maybe this should raise an exception or something.  */
+
+unsigned long
+gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
+{
+  mp_limb_t  a[LIMBS_PER_ULONG];
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
+
+#if LIMBS_PER_ULONG == 1
+  return a[0];
+#else
+  return a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+}

diff --git a/rand/randclr.c b/rand/randclr.c
new file mode 100644
index 0000000..3fa94fe
--- /dev/null
+++ b/rand/randclr.c

@@ -0,0 +1,37 @@
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+gmp_randclear (gmp_randstate_ptr rstate)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
+}

diff --git a/rand/randdef.c b/rand/randdef.c
new file mode 100644
index 0000000..74d9cce
--- /dev/null
+++ b/rand/randdef.c

@@ -0,0 +1,37 @@
+/* gmp_randinit_default -- initialize a random state with a default algorithm.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+gmp_randinit_default (gmp_randstate_ptr rstate)
+{
+  gmp_randinit_mt (rstate);
+}

diff --git a/rand/randiset.c b/rand/randiset.c
new file mode 100644
index 0000000..11b5b97
--- /dev/null
+++ b/rand/randiset.c

@@ -0,0 +1,38 @@
+/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+void
+gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
+}

diff --git a/rand/randlc2s.c b/rand/randlc2s.c
new file mode 100644
index 0000000..7a5fa49
--- /dev/null
+++ b/rand/randlc2s.c

@@ -0,0 +1,92 @@
+/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
+   congruential generator of a requested size.
+
+Copyright 1999-2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp-impl.h"
+
+
+/* Array of LC-schemes, ordered in increasing order of the first
+   member (the 'm2exp' value).  The end of the array is indicated with
+   an entry containing all zeros.  */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.)  */
+
+struct __gmp_rand_lc_scheme_struct
+{
+  unsigned long int m2exp;	/* Modulus is 2 ^ m2exp. */
+  const char *astr;		/* Multiplier in string form. */
+  unsigned long int c;		/* Addend. */
+};
+
+static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+  {32, "29CF535",	     1},
+  {33, "51F666D",	     1},
+  {34, "A3D73AD",	     1},
+  {35, "147E5B85",	     1},
+  {36, "28F725C5",	     1},
+  {37, "51EE3105",	     1},
+  {38, "A3DD5CDD",	     1},
+  {39, "147AF833D",	     1},
+  {40, "28F5DA175",	     1},
+  {56, "AA7D735234C0DD",  1},
+  {64, "BAECD515DAF0B49D", 1},
+  {100, "292787EBD3329AD7E7575E2FD", 1},
+  {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
+  {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
+  {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
+  {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
+  {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
+  {0, NULL, 0}			/* End of array. */
+};
+
+int
+gmp_randinit_lc_2exp_size (gmp_randstate_ptr rstate, mp_bitcnt_t size)
+{
+  const struct __gmp_rand_lc_scheme_struct *sp;
+  mpz_t a;
+
+  /* Pick a scheme.  */
+  for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+    if (sp->m2exp / 2 >= size)
+      goto found;
+  return 0;
+
+ found:
+  /* Install scheme.  */
+  mpz_init_set_str (a, sp->astr, 16);
+  gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+  mpz_clear (a);
+  return 1;
+}

diff --git a/rand/randlc2x.c b/rand/randlc2x.c
new file mode 100644
index 0000000..03cb368
--- /dev/null
+++ b/rand/randlc2x.c

@@ -0,0 +1,331 @@
+/* Linear Congruential pseudo-random number generator functions.
+
+Copyright 1999-2003, 2005, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
+
+   _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
+   SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
+   padded with high zero limbs if necessary.  ALLOC(_mp_seed) is the current
+   size of PTR(_mp_seed) in the usual way.  There only needs to be
+   BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
+   initialization and seeding end up making it a bit more than this.
+
+   _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1.  SIZ(_mp_a) is
+   the size of the value in the normal way for an mpz_t, except that a value
+   of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0.  This makes it
+   easy to call mpn_mul, and the case of a==0 is highly un-random and not
+   worth any trouble to optimize.
+
+   {_cp,_cn} is the "c" addend.  Normally _cn is 1, but when nails are in
+   use a ulong can be bigger than one limb, and in this case _cn is 2 if
+   necessary.  c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
+   to call __GMPN_ADD.  c==0 is fairly un-random so isn't worth optimizing.
+
+   _mp_m2exp gives the modulus, namely 2^m2exp.  We demand m2exp>=1, since
+   m2exp==0 would mean no bits at all out of each iteration, which makes no
+   sense.  */
+
+typedef struct {
+  mpz_t          _mp_seed;
+  mpz_t          _mp_a;
+  mp_size_t      _cn;
+  mp_limb_t      _cp[LIMBS_PER_ULONG];
+  unsigned long  _mp_m2exp;
+} gmp_rand_lc_struct;
+
+
+/* lc (rp, state) -- Generate next number in LC sequence.  Return the
+   number of valid bits in the result.  Discards the lower half of the
+   result.  */
+
+static unsigned long int
+lc (mp_ptr rp, gmp_randstate_ptr rstate)
+{
+  mp_ptr tp, seedp, ap;
+  mp_size_t ta;
+  mp_size_t tn, seedn, an;
+  unsigned long int m2exp;
+  unsigned long int bits;
+  mp_size_t xn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  m2exp = p->_mp_m2exp;
+
+  seedp = PTR (p->_mp_seed);
+  seedn = SIZ (p->_mp_seed);
+
+  ap = PTR (p->_mp_a);
+  an = SIZ (p->_mp_a);
+
+  /* Allocate temporary storage.  Let there be room for calculation of
+     (A * seed + C) % M, or M if bigger than that.  */
+
+  TMP_MARK;
+
+  ta = an + seedn + 1;
+  tn = BITS_TO_LIMBS (m2exp);
+  if (ta <= tn) /* that is, if (ta < tn + 1) */
+    {
+      mp_size_t tmp = an + seedn;
+      ta = tn + 1;
+      tp = TMP_ALLOC_LIMBS (ta);
+      MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out.  */
+    }
+  else
+    tp = TMP_ALLOC_LIMBS (ta);
+
+  /* t = a * seed.  NOTE: an is always > 0; see initialization.  */
+  ASSERT (seedn >= an && an > 0);
+  mpn_mul (tp, seedp, seedn, ap, an);
+
+  /* t = t + c.  NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
+     see initialization.  */
+  ASSERT (tn >= p->_cn);
+  mpn_add (tp, tp, tn, p->_cp, p->_cn);
+
+  /* t = t % m */
+  tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
+
+  /* Save result as next seed.  */
+  MPN_COPY (PTR (p->_mp_seed), tp, tn);
+
+  /* Discard the lower m2exp/2 of the result.  */
+  bits = m2exp / 2;
+  xn = bits / GMP_NUMB_BITS;
+
+  tn -= xn;
+  if (tn > 0)
+    {
+      unsigned int cnt = bits % GMP_NUMB_BITS;
+      if (cnt != 0)
+	{
+	  mpn_rshift (tp, tp + xn, tn, cnt);
+	  MPN_COPY_INCR (rp, tp, xn + 1);
+	}
+      else			/* Even limb boundary.  */
+	MPN_COPY_INCR (rp, tp + xn, tn);
+    }
+
+  TMP_FREE;
+
+  /* Return number of valid bits in the result.  */
+  return (m2exp + 1) / 2;
+}
+
+
+/* Obtain a sequence of random numbers.  */
+static void
+randget_lc (gmp_randstate_ptr rstate, mp_ptr rp, unsigned long int nbits)
+{
+  unsigned long int rbitpos;
+  int chunk_nbits;
+  mp_ptr tp;
+  mp_size_t tn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  TMP_MARK;
+
+  chunk_nbits = p->_mp_m2exp / 2;
+  tn = BITS_TO_LIMBS (chunk_nbits);
+
+  tp = TMP_ALLOC_LIMBS (tn);
+
+  rbitpos = 0;
+  while (rbitpos + chunk_nbits <= nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+
+      if (rbitpos % GMP_NUMB_BITS != 0)
+	{
+	  mp_limb_t savelimb, rcy;
+	  /* Target of new chunk is not bit aligned.  Use temp space
+	     and align things by shifting it up.  */
+	  lc (tp, rstate);
+	  savelimb = r2p[0];
+	  rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+	  r2p[0] |= savelimb;
+	  /* bogus */
+	  if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
+	      > GMP_NUMB_BITS)
+	    r2p[tn] = rcy;
+	}
+      else
+	{
+	  /* Target of new chunk is bit aligned.  Let `lc' put bits
+	     directly into our target variable.  */
+	  lc (r2p, rstate);
+	}
+      rbitpos += chunk_nbits;
+    }
+
+  /* Handle last [0..chunk_nbits) bits.  */
+  if (rbitpos != nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+      int last_nbits = nbits - rbitpos;
+      tn = BITS_TO_LIMBS (last_nbits);
+      lc (tp, rstate);
+      if (rbitpos % GMP_NUMB_BITS != 0)
+	{
+	  mp_limb_t savelimb, rcy;
+	  /* Target of new chunk is not bit aligned.  Use temp space
+	     and align things by shifting it up.  */
+	  savelimb = r2p[0];
+	  rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+	  r2p[0] |= savelimb;
+	  if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
+	    r2p[tn] = rcy;
+	}
+      else
+	{
+	  MPN_COPY (r2p, tp, tn);
+	}
+      /* Mask off top bits if needed.  */
+      if (nbits % GMP_NUMB_BITS != 0)
+	rp[nbits / GMP_NUMB_BITS]
+	  &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
+    }
+
+  TMP_FREE;
+}
+
+
+static void
+randseed_lc (gmp_randstate_ptr rstate, mpz_srcptr seed)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+  mpz_ptr seedz = p->_mp_seed;
+  mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
+
+  /* Store p->_mp_seed as an unnormalized integer with size enough
+     for numbers up to 2^m2exp-1.  That size can't be zero.  */
+  mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
+  MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
+  SIZ (seedz) = seedn;
+}
+
+
+static void
+randclear_lc (gmp_randstate_ptr rstate)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  mpz_clear (p->_mp_seed);
+  mpz_clear (p->_mp_a);
+  (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
+}
+
+static void randiset_lc (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Linear_Congruential_Generator = {
+  randseed_lc,
+  randget_lc,
+  randclear_lc,
+  randiset_lc
+};
+
+static void
+randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  gmp_rand_lc_struct *dstp, *srcp;
+
+  srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
+  dstp = (gmp_rand_lc_struct *) (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
+
+  RNG_STATE (dst) = (mp_limb_t *) (void *) dstp;
+  RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
+
+  /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
+     mpz_init_set won't worry about that */
+  mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
+  mpz_init_set (dstp->_mp_a,    srcp->_mp_a);
+
+  dstp->_cn = srcp->_cn;
+
+  dstp->_cp[0] = srcp->_cp[0];
+  if (LIMBS_PER_ULONG > 1)
+    dstp->_cp[1] = srcp->_cp[1];
+  if (LIMBS_PER_ULONG > 2)  /* usually there's only 1 or 2 */
+    MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
+
+  dstp->_mp_m2exp = srcp->_mp_m2exp;
+}
+
+
+void
+gmp_randinit_lc_2exp (gmp_randstate_ptr rstate,
+		      mpz_srcptr a,
+		      unsigned long int c,
+		      mp_bitcnt_t m2exp)
+{
+  gmp_rand_lc_struct *p;
+  mp_size_t seedn = BITS_TO_LIMBS (m2exp);
+
+  ASSERT_ALWAYS (m2exp != 0);
+
+  p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
+  RNG_STATE (rstate) = (mp_limb_t *) (void *) p;
+  RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
+
+  /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
+  mpz_init2 (p->_mp_seed, m2exp);
+  MPN_ZERO (PTR (p->_mp_seed), seedn);
+  SIZ (p->_mp_seed) = seedn;
+  PTR (p->_mp_seed)[0] = 1;
+
+  /* "a", forced to 0 to 2^m2exp-1 */
+  mpz_init (p->_mp_a);
+  mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
+
+  /* Avoid SIZ(a) == 0 to avoid checking for special case in lc().  */
+  if (SIZ (p->_mp_a) == 0)
+    {
+      SIZ (p->_mp_a) = 1;
+      MPZ_NEWALLOC (p->_mp_a, 1)[0] = CNST_LIMB (0);
+    }
+
+  MPN_SET_UI (p->_cp, p->_cn, c);
+
+  /* Internally we may discard any bits of c above m2exp.  The following
+     code ensures that __GMPN_ADD in lc() will always work.  */
+  if (seedn < p->_cn)
+    p->_cn = (p->_cp[0] != 0);
+
+  p->_mp_m2exp = m2exp;
+}

diff --git a/rand/randmt.c b/rand/randmt.c
new file mode 100644
index 0000000..daf8894
--- /dev/null
+++ b/rand/randmt.c

@@ -0,0 +1,415 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>   /* for NULL */
+
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* This code implements the Mersenne Twister pseudorandom number generator
+   by Takuji Nishimura and Makoto Matsumoto.  The buffer initialization
+   function is different in order to permit seeds greater than 2^32-1.
+
+   This file contains a special __gmp_randinit_mt_noseed which excludes the
+   seeding function from the gmp_randfnptr_t routines.  This is for use by
+   mpn_random and mpn_random2 on the global random generator.  MT seeding
+   uses mpz functions, and we don't want mpn routines dragging mpz functions
+   into the link.  */
+
+
+/* Default seed to use when the generator is not initialized.  */
+#define DEFAULT_SEED 5489 /* was 4357 */
+
+/* Tempering masks.  */
+#define MASK_1 0x9D2C5680
+#define MASK_2 0xEFC60000
+
+/* Initial state of buffer when initialized with default seed.  */
+static const gmp_uint_least32_t default_state[N] =
+{
+  0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
+  0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
+  0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
+  0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
+  0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
+  0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
+  0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
+  0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
+  0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
+  0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
+  0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
+  0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
+  0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
+  0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
+  0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
+  0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
+  0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
+  0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
+  0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
+  0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
+  0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
+  0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
+  0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
+  0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
+  0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
+  0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
+  0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
+  0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
+  0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
+  0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
+  0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
+  0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
+  0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
+  0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
+  0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
+  0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
+  0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
+  0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
+  0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
+  0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
+  0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
+  0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
+  0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
+  0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
+  0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
+  0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
+  0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
+  0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
+  0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
+  0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
+  0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
+  0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
+  0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
+  0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
+  0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
+  0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
+  0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
+  0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
+  0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
+  0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
+  0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
+  0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
+  0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
+  0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
+  0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
+  0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
+  0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
+  0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
+  0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
+  0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
+  0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
+  0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
+  0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
+  0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
+  0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
+  0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
+  0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
+  0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
+  0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
+  0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
+  0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
+  0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
+  0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
+  0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
+  0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
+  0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
+  0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
+  0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
+  0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
+  0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
+  0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
+  0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
+  0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
+  0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
+  0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
+  0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
+  0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
+  0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
+  0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
+  0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
+  0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
+  0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
+  0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
+  0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
+};
+
+void
+__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
+{
+  gmp_uint_least32_t y;
+  int kk;
+
+  for (kk = 0; kk < N - M; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+  for (; kk < N - 1; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+
+  y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
+  mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+}
+
+
+/* Get nbits bits of output from the generator into dest.
+   Note that Mersenne Twister is designed to produce outputs in
+   32-bit words.  */
+void
+__gmp_randget_mt (gmp_randstate_ptr rstate, mp_ptr dest, unsigned long int nbits)
+{
+  gmp_uint_least32_t y;
+  int rbits;
+  mp_size_t i;
+  mp_size_t nlimbs;
+  int *pmti;
+  gmp_uint_least32_t *mt;
+
+  pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
+  mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
+
+  nlimbs = nbits / GMP_NUMB_BITS;
+  rbits = nbits % GMP_NUMB_BITS;
+
+#define NEXT_RANDOM			\
+  do					\
+    {					\
+      if (*pmti >= N)			\
+	{				\
+	  __gmp_mt_recalc_buffer (mt);  \
+	  *pmti = 0;			\
+	}				\
+      y = mt[(*pmti)++];		\
+      y ^= (y >> 11);			\
+      y ^= (y << 7) & MASK_1;		\
+      y ^= (y << 15) & MASK_2;		\
+      y ^= (y >> 18);			\
+    }					\
+  while (0)
+
+
+  /* Handle the common cases of 32- or 64-bit limbs with fast,
+     optimized routines, and the rest of cases with a general
+     routine.  In all cases, no more than 31 bits are rejected
+     for the last limb so that every version of the code is
+     consistent with the others.  */
+
+#if (GMP_NUMB_BITS == 32)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+    }
+  if (rbits)
+    {
+      NEXT_RANDOM;
+      dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+    }
+
+#else /* GMP_NUMB_BITS != 32 */
+#if (GMP_NUMB_BITS == 64)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+      NEXT_RANDOM;
+      dest[i] |= (mp_limb_t) y << 32;
+    }
+  if (rbits)
+    {
+      if (rbits < 32)
+	{
+	  NEXT_RANDOM;
+	  dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+	}
+      else
+	{
+	  NEXT_RANDOM;
+	  dest[nlimbs] = (mp_limb_t) y;
+	  if (rbits > 32)
+	    {
+	      NEXT_RANDOM;
+	      dest[nlimbs] |=
+		((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
+	    }
+	}
+    }
+
+#else /* GMP_NUMB_BITS != 64 */
+
+  {
+    /* Fall back to a general algorithm.  This algorithm works by
+       keeping a pool of up to 64 bits (2 outputs from MT) acting
+       as a shift register from which bits are consumed as needed.
+       Bits are consumed using the LSB bits of bitpool_l, and
+       inserted via bitpool_h and shifted to the right place.  */
+
+    gmp_uint_least32_t bitpool_h = 0;
+    gmp_uint_least32_t bitpool_l = 0;
+    int bits_in_pool = 0;	/* Holds number of valid bits in the pool.  */
+    int bits_to_fill;		/* Holds total number of bits to put in
+				   destination.  */
+    int bitidx;			/* Holds the destination bit position.  */
+    mp_size_t nlimbs2;		/* Number of whole+partial limbs to fill.  */
+
+    nlimbs2 = nlimbs + (rbits != 0);
+
+    for (i = 0; i < nlimbs2; i++)
+      {
+	bitidx = 0;
+	if (i < nlimbs)
+	  bits_to_fill = GMP_NUMB_BITS;
+	else
+	  bits_to_fill = rbits;
+
+	dest[i] = CNST_LIMB (0);
+	while (bits_to_fill >= 32) /* Process whole 32-bit blocks first.  */
+	  {
+	    if (bits_in_pool < 32)	/* Need more bits.  */
+	      {
+		/* 64-bit right shift.  */
+		NEXT_RANDOM;
+		bitpool_h = y;
+		bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+		if (bits_in_pool == 0)
+		  bitpool_h = 0;
+		else
+		  bitpool_h >>= 32 - bits_in_pool;
+		bits_in_pool += 32;	/* We've got 32 more bits.  */
+	      }
+
+	    /* Fill a 32-bit chunk.  */
+	    dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
+	    bitpool_l = bitpool_h;
+	    bits_in_pool -= 32;
+	    bits_to_fill -= 32;
+	    bitidx += 32;
+	  }
+
+	/* Cover the case where GMP_NUMB_BITS is not a multiple of 32.  */
+	if (bits_to_fill != 0)
+	  {
+	    if (bits_in_pool < bits_to_fill)
+	      {
+		NEXT_RANDOM;
+		bitpool_h = y;
+		bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+		if (bits_in_pool == 0)
+		  bitpool_h = 0;
+		else
+		  bitpool_h >>= 32 - bits_in_pool;
+		bits_in_pool += 32;
+	      }
+
+	    dest[i] |= (((mp_limb_t) bitpool_l
+			 & ~(~CNST_LIMB (0) << bits_to_fill))
+			<< bitidx);
+	    bitpool_l = ((bitpool_l >> bits_to_fill)
+			 | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
+	    bitpool_h >>= bits_to_fill;
+	    bits_in_pool -= bits_to_fill;
+	  }
+      }
+  }
+
+#endif /* GMP_NUMB_BITS != 64 */
+#endif /* GMP_NUMB_BITS != 32 */
+}
+
+void
+__gmp_randclear_mt (gmp_randstate_ptr rstate)
+{
+  (*__gmp_free_func) ((void *) RNG_STATE (rstate),
+		      ALLOC (rstate->_mp_seed) * GMP_LIMB_BYTES);
+}
+
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
+  NULL,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+void
+__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / GMP_LIMB_BYTES) + 1;
+  gmp_rand_mt_struct *dstp, *srcp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = RNG_FNPTR(src);
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Copy state.  */
+  srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = srcp->mt[i];
+
+  dstp->mti = srcp->mti;
+}
+
+void
+__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / GMP_LIMB_BYTES) + 1;
+  gmp_rand_mt_struct *dstp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Set state for default seed.  */
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = default_state[i];
+
+  dstp->mti = WARM_UP % N;
+}

diff --git a/rand/randmt.h b/rand/randmt.h
new file mode 100644
index 0000000..61f8b4f
--- /dev/null
+++ b/rand/randmt.h

@@ -0,0 +1,51 @@
+/* Mersenne Twister pseudo-random number generator defines.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/* Number of extractions used to warm the buffer up.  */
+#define WARM_UP 2000
+
+/* Period parameters.  */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908B0DF   /* Constant vector a.  */
+
+/* State structure for MT.  */
+typedef struct
+{
+  gmp_uint_least32_t mt[N];    /* State array.  */
+  int mti;                     /* Index of current value.  */
+} gmp_rand_mt_struct;
+
+
+void __gmp_mt_recalc_buffer (gmp_uint_least32_t *);
+void __gmp_randget_mt (gmp_randstate_ptr, mp_ptr, unsigned long int);
+void __gmp_randclear_mt (gmp_randstate_ptr);
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);

diff --git a/rand/randmts.c b/rand/randmts.c
new file mode 100644
index 0000000..2b95a2a
--- /dev/null
+++ b/rand/randmts.c

@@ -0,0 +1,171 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+Copyright 2002, 2003, 2013, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
+   needed by the seeding function below.  */
+static void
+mangle_seed (mpz_ptr r)
+{
+  mpz_t          t, b;
+  unsigned long  e = 0x40118124;
+  unsigned long  bit = 0x20000000;
+
+  mpz_init2 (t, 19937L);
+  mpz_init_set (b, r);
+
+  do
+    {
+      mpz_mul (r, r, r);
+
+    reduce:
+      for (;;)
+        {
+          mpz_tdiv_q_2exp (t, r, 19937L);
+          if (SIZ (t) == 0)
+            break;
+          mpz_tdiv_r_2exp (r, r, 19937L);
+          mpz_addmul_ui (r, t, 20023L);
+        }
+
+      if ((e & bit) != 0)
+        {
+          e ^= bit;
+          mpz_mul (r, r, b);
+          goto reduce;
+        }
+
+      bit >>= 1;
+    }
+  while (bit != 0);
+
+  mpz_clear (t);
+  mpz_clear (b);
+}
+
+
+/* Seeding function.  Uses powering modulo a non-Mersenne prime to obtain
+   a permutation of the input seed space.  The modulus is 2^19937-20023,
+   which is probably prime.  The power is 1074888996.  In order to avoid
+   seeds 0 and 1 generating invalid or strange output, the input seed is
+   first manipulated as follows:
+
+     seed1 = seed mod (2^19937-20027) + 2
+
+   so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
+   powering is performed as follows:
+
+     seed2 = (seed1^1074888996) mod (2^19937-20023)
+
+   and then seed2 is used to bootstrap the buffer.
+
+   This method aims to give guarantees that:
+     a) seed2 will never be zero,
+     b) seed2 will very seldom have a very low population of ones in its
+	binary representation, and
+     c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
+	different sequence.
+
+   CAVEATS:
+
+   The period of the seeding function is 2^19937-20027.  This means that
+   with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
+   are obtained as with seeds 0, 1, etc.; it also means that seed -1
+   produces the same sequence as seed 2^19937-20028, etc.
+
+   Moreover, c) is not guaranted, there are many seeds yielding to the
+   same sequence, because gcd (1074888996, 2^19937 - 20023 - 1) = 12.
+   E.g. x and x'=x*19^((2^19937-20023-1) / 12) mod (2^19937-20023), if
+   chosen as seed1, generate the same seed2, for every x.
+   Similarly x" can be obtained from x', obtaining 12 different
+   values.
+ */
+
+static void
+randseed_mt (gmp_randstate_ptr rstate, mpz_srcptr seed)
+{
+  int i;
+  size_t cnt;
+
+  gmp_rand_mt_struct *p;
+  mpz_t mod;    /* Modulus.  */
+  mpz_t seed1;  /* Intermediate result.  */
+
+  p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
+
+  mpz_init2 (mod, 19938L);
+  mpz_init2 (seed1, 19937L);
+
+  mpz_setbit (mod, 19937L);
+  mpz_sub_ui (mod, mod, 20027L);
+  mpz_mod (seed1, seed, mod);	/* Reduce `seed' modulo `mod'.  */
+  mpz_clear (mod);
+  mpz_add_ui (seed1, seed1, 2L);	/* seed1 is now ready.  */
+  mangle_seed (seed1);	/* Perform the mangling by powering.  */
+
+  /* Copy the last bit into bit 31 of mt[0] and clear it.  */
+  p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
+  mpz_clrbit (seed1, 19936L);
+
+  /* Split seed1 into N-1 32-bit chunks.  */
+  mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
+              8 * sizeof (p->mt[1]) - 32, seed1);
+  mpz_clear (seed1);
+  cnt++;
+  ASSERT (cnt <= N);
+  while (cnt < N)
+    p->mt[cnt++] = 0;
+
+  /* Warm the generator up if necessary.  */
+  if (WARM_UP != 0)
+    for (i = 0; i < WARM_UP / N; i++)
+      __gmp_mt_recalc_buffer (p->mt);
+
+  p->mti = WARM_UP % N;
+}
+
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator = {
+  randseed_mt,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+/* Initialize MT-specific data.  */
+void
+gmp_randinit_mt (gmp_randstate_ptr rstate)
+{
+  __gmp_randinit_mt_noseed (rstate);
+  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
+}

diff --git a/rand/randmui.c b/rand/randmui.c
new file mode 100644
index 0000000..d3292db
--- /dev/null
+++ b/rand/randmui.c

@@ -0,0 +1,85 @@
+/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* If n is a power of 2 then the test ret<n is always true and the loop is
+   unnecessary, but there's no need to add special code for this.  Just get
+   the "bits" calculation correct and let it go through normally.
+
+   If n is 1 then will have bits==0 and _gmp_rand will produce no output and
+   we always return 0.  Again there seems no need for a special case, just
+   initialize a[0]=0 and let it go through normally.  */
+
+#define MAX_URANDOMM_ITER  80
+
+unsigned long
+gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
+{
+  mp_limb_t      a[LIMBS_PER_ULONG];
+  unsigned long  ret, bits, leading;
+  int            i;
+
+  if (UNLIKELY (n == 0))
+    DIVIDE_BY_ZERO;
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
+     will store only a[0].  */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  count_leading_zeros (leading, (mp_limb_t) n);
+  bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
+
+  for (i = 0; i < MAX_URANDOMM_ITER; i++)
+    {
+      _gmp_rand (a, rstate, bits);
+#if LIMBS_PER_ULONG == 1
+      ret = a[0];
+#else
+      ret = a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+      if (LIKELY (ret < n))   /* usually one iteration suffices */
+        goto done;
+    }
+
+  /* Too many iterations, there must be something degenerate about the
+     rstate algorithm.  Return r%n.  */
+  ret -= n;
+  ASSERT (ret < n);
+
+ done:
+  return ret;
+}

diff --git a/rand/rands.c b/rand/rands.c
new file mode 100644
index 0000000..af436a4
--- /dev/null
+++ b/rand/rands.c

@@ -0,0 +1,41 @@
+/* __gmp_rands -- global random state for old-style random functions.
+
+   EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY.  IT'S ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
+   MP RELEASES.  */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+/* Use this via the RANDS macro in gmp-impl.h */
+char             __gmp_rands_initialized = 0;
+gmp_randstate_t  __gmp_rands;

diff --git a/rand/randsd.c b/rand/randsd.c
new file mode 100644
index 0000000..880a148
--- /dev/null
+++ b/rand/randsd.c

@@ -0,0 +1,38 @@
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+gmp_randseed (gmp_randstate_ptr rstate,
+	      mpz_srcptr seed)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
+}

diff --git a/rand/randsdui.c b/rand/randsdui.c
new file mode 100644
index 0000000..a91a148
--- /dev/null
+++ b/rand/randsdui.c

@@ -0,0 +1,43 @@
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+   state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+void
+gmp_randseed_ui (gmp_randstate_ptr rstate,
+                 unsigned long int seed)
+{
+  mpz_t zseed;
+  mp_limb_t zlimbs[LIMBS_PER_ULONG];
+
+  MPZ_FAKE_UI (zseed, zlimbs, seed);
+  gmp_randseed (rstate, zseed);
+}

diff --git a/scanf/doscan.c b/scanf/doscan.c
new file mode 100644
index 0000000..35273a3
--- /dev/null
+++ b/scanf/doscan.c

@@ -0,0 +1,766 @@
+/* __gmp_doscan -- formatted input internals.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"	/* needed for the HAVE_, could also move gmp incls */
+
+#include <stdarg.h>
+#include <ctype.h>
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>    /* for strtol */
+#include <string.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* General:
+
+       It's necessary to parse up the format string to recognise the GMP
+       extra types F, Q and Z.  Other types and conversions are passed
+       across to the standard sscanf or fscanf via funs->scan, for ease of
+       implementation.  This is essential in the case of something like glibc
+       %p where the pointer format isn't actually documented.
+
+       Because funs->scan doesn't get the whole input it can't put the right
+       values in for %n, so that's handled in __gmp_doscan.  Neither sscanf
+       nor fscanf directly indicate how many characters were read, so an
+       extra %n is appended to each run for that.  For fscanf this merely
+       supports our %n output, but for sscanf it lets funs->step move us
+       along the input string.
+
+       Whitespace and literal matches in the format string, including %%,
+       are handled directly within __gmp_doscan.  This is reasonably
+       efficient, and avoids some suspicious behaviour observed in various
+       system libc's.  GLIBC 2.2.4 for instance returns 0 on
+
+	   sscanf(" ", " x")
+       or
+	   sscanf(" ", " x%d",&n)
+
+       whereas we think they should return EOF, since end-of-string is
+       reached when a match of "x" is required.
+
+       For standard % conversions, funs->scan is called once for each
+       conversion.  If we had vfscanf and vsscanf and could rely on their
+       fixed text matching behaviour then we could call them with multiple
+       consecutive standard conversions.  But plain fscanf and sscanf work
+       fine, and parsing one field at a time shouldn't be too much of a
+       slowdown.
+
+   gmpscan:
+
+       gmpscan reads a gmp type.  It's only used from one place, but is a
+       separate subroutine to avoid a big chunk of complicated code in the
+       middle of __gmp_doscan.  Within gmpscan a couple of loopbacks make it
+       possible to share code for parsing integers, rationals and floats.
+
+       In gmpscan normally one char of lookahead is maintained, but when width
+       is reached that stops, on the principle that an fgetc/ungetc of a char
+       past where we're told to stop would be undesirable.  "chars" is how many
+       characters have been read so far, including the current c.  When
+       chars==width and another character is desired then a jump is done to the
+       "convert" stage.  c is invalid and mustn't be unget'ed in this case;
+       chars is set to width+1 to indicate that.
+
+       gmpscan normally returns the number of characters read.  -1 means an
+       invalid field, -2 means EOF reached before any matching characters
+       were read.
+
+       For hex floats, the mantissa part is passed to mpf_set_str, then the
+       exponent is applied with mpf_mul_exp or mpf_div_2exp.  This is easier
+       than teaching mpf_set_str about an exponent factor (ie. 2) differing
+       from the mantissa radix point factor (ie. 16).  mpf_mul_exp and
+       mpf_div_2exp will preserve the application requested precision, so
+       nothing in that respect is lost by making this a two-step process.
+
+   Matching and errors:
+
+       C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
+       string which is a match for the appropriate type, or a prefix of a
+       match.  With that done, if it's only a prefix then the result is a
+       matching failure, ie. invalid input.
+
+       This rule seems fairly clear, but doesn't seem to be universally
+       applied in system C libraries.  Even GLIBC doesn't seem to get it
+       right, insofar as it seems to accept some apparently invalid forms.
+       Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
+       standard would suggest a non-empty sequence of digits should be
+       required after an "0x".
+
+       A footnote to 7.19.6.2 para 17 notes how this input item reading can
+       mean inputs acceptable to strtol are not acceptable to fscanf.  We
+       think this confirms our reading of "0x" as invalid.
+
+       Clearly gmp_sscanf could backtrack to a longest input which was a
+       valid match for a given item, but this is not done, since C99 says
+       sscanf is identical to fscanf, so we make gmp_sscanf identical to
+       gmp_fscanf.
+
+   Types:
+
+       C99 says "ll" is for long long, and "L" is for long double floats.
+       Unfortunately in GMP 4.1.1 we documented the two as equivalent.  This
+       doesn't affect us directly, since both are passed through to plain
+       scanf.  It seems wisest not to try to enforce the C99 rule.  This is
+       consistent with what we said before, though whether it actually
+       worked was always up to the C library.
+
+   Alternatives:
+
+       Consideration was given to using separate code for gmp_fscanf and
+       gmp_sscanf.  The sscanf case could zip across a string doing literal
+       matches or recognising digits in gmpscan, rather than making a
+       function call fun->get per character.  The fscanf could use getc
+       rather than fgetc too, which might help those systems where getc is a
+       macro or otherwise inlined.  But none of this scanning and converting
+       will be particularly fast, so the two are done together to keep it a
+       little simpler for now.
+
+       Various multibyte string issues are not addressed, for a start C99
+       scanf says the format string is multibyte.  Since we pass %c, %s and
+       %[ to the system scanf, they might do multibyte reads already, but
+       it's another matter whether or not that can be used, since our digit
+       and whitespace parsing is only unibyte.  The plan is to quietly
+       ignore multibyte locales for now.  This is not as bad as it sounds,
+       since GMP is presumably used mostly on numbers, which can be
+       perfectly adequately treated in plain ASCII.
+
+*/
+
+
+struct gmp_doscan_params_t {
+  int	base;
+  int	ignore;
+  char	type;
+  int	width;
+};
+
+
+#define GET(c)			\
+  do {				\
+    ASSERT (chars <= width);	\
+    chars++;			\
+    if (chars > width)		\
+      goto convert;		\
+    (c) = (*funs->get) (data);	\
+  } while (0)
+
+/* store into "s", extending if necessary */
+#define STORE(c)							\
+  do {									\
+    ASSERT (s_upto <= s_alloc);						\
+    if (s_upto >= s_alloc)						\
+      {									\
+	size_t	s_alloc_new = s_alloc + S_ALLOC_STEP;			\
+	s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
+	s_alloc = s_alloc_new;						\
+      }									\
+    s[s_upto++] = c;							\
+  } while (0)
+
+#define S_ALLOC_STEP  512
+
+static int
+gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
+	 const struct gmp_doscan_params_t *p, void *dst)
+{
+  int	  chars, c, base, first, width, seen_point, seen_digit, hexfloat;
+  size_t  s_upto, s_alloc, hexexp;
+  char	  *s;
+  int	  invalid = 0;
+
+  TRACE (printf ("gmpscan\n"));
+
+  ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
+
+  c = (*funs->get) (data);
+  if (c == EOF)
+    return -2;
+
+  chars = 1;
+  first = 1;
+  seen_point = 0;
+  width = (p->width == 0 ? INT_MAX-1 : p->width);
+  base = p->base;
+  s_alloc = S_ALLOC_STEP;
+  s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
+  s_upto = 0;
+  hexfloat = 0;
+  hexexp = 0;
+
+ another:
+  seen_digit = 0;
+  if (c == '-')
+    {
+      STORE (c);
+      goto get_for_sign;
+    }
+  else if (c == '+')
+    {
+      /* don't store '+', it's not accepted by mpz_set_str etc */
+    get_for_sign:
+      GET (c);
+    }
+
+  if (base == 0)
+    {
+      base = 10;		  /* decimal if no base indicator */
+      if (c == '0')
+	{
+	  seen_digit = 1;	  /* 0 alone is a valid number */
+	  if (p->type != 'F')
+	    base = 8;		  /* leading 0 is octal, for non-floats */
+	  STORE (c);
+	  GET (c);
+	  if (c == 'x' || c == 'X')
+	    {
+	      base = 16;
+	      seen_digit = 0;	  /* must have digits after an 0x */
+	      if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
+		hexfloat = 1;
+	      else
+		STORE (c);
+	      GET (c);
+	    }
+	}
+    }
+
+ digits:
+  for (;;)
+    {
+      if (base == 16)
+	{
+	  if (! isxdigit (c))
+	    break;
+	}
+      else
+	{
+	  if (! isdigit (c))
+	    break;
+	  if (base == 8 && (c == '8' || c == '9'))
+	    break;
+	}
+
+      seen_digit = 1;
+      STORE (c);
+      GET (c);
+    }
+
+  if (first)
+    {
+      /* decimal point */
+      if (p->type == 'F' && ! seen_point)
+	{
+	  /* For a multi-character decimal point, if the first character is
+	     present then all of it must be, otherwise the input is
+	     considered invalid.  */
+	  const char  *point = GMP_DECIMAL_POINT;
+	  int	      pc = (unsigned char) *point++;
+	  if (c == pc)
+	    {
+	      for (;;)
+		{
+		  STORE (c);
+		  GET (c);
+		  pc = (unsigned char) *point++;
+		  if (pc == '\0')
+		    break;
+		  if (c != pc)
+		    goto set_invalid;
+		}
+	      seen_point = 1;
+	      goto digits;
+	    }
+	}
+
+      /* exponent */
+      if (p->type == 'F')
+	{
+	  if (hexfloat && (c == 'p' || c == 'P'))
+	    {
+	      hexexp = s_upto; /* exponent location */
+	      base = 10;       /* exponent in decimal */
+	      goto exponent;
+	    }
+	  else if (! hexfloat && (c == 'e' || c == 'E'))
+	    {
+	    exponent:
+	      /* must have at least one digit in the mantissa, just an exponent
+		 is not good enough */
+	      if (! seen_digit)
+		goto set_invalid;
+
+	    do_second:
+	      first = 0;
+	      STORE (c);
+	      GET (c);
+	      goto another;
+	    }
+	}
+
+      /* denominator */
+      if (p->type == 'Q' && c == '/')
+	{
+	  /* must have at least one digit in the numerator */
+	  if (! seen_digit)
+	    goto set_invalid;
+
+	  /* now look for at least one digit in the denominator */
+	  seen_digit = 0;
+
+	  /* allow the base to be redetermined for "%i" */
+	  base = p->base;
+	  goto do_second;
+	}
+    }
+
+ convert:
+  if (! seen_digit)
+    {
+    set_invalid:
+      invalid = 1;
+      goto done;
+    }
+
+  if (! p->ignore)
+    {
+      STORE ('\0');
+      TRACE (printf ("	convert \"%s\"\n", s));
+
+      /* We ought to have parsed out a valid string above, so just test
+	 mpz_set_str etc with an ASSERT.  */
+      switch (p->type) {
+      case 'F':
+	{
+	  mpf_ptr  f = (mpf_ptr) dst;
+	  if (hexexp != 0)
+	    s[hexexp] = '\0';
+	  ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
+	  if (hexexp != 0)
+	    {
+	      char *dummy;
+	      long  exp;
+	      exp = strtol (s + hexexp + 1, &dummy, 10);
+	      if (exp >= 0)
+		mpf_mul_2exp (f, f, (unsigned long) exp);
+	      else
+		mpf_div_2exp (f, f, NEG_CAST (unsigned long, exp));
+	    }
+	}
+	break;
+      case 'Q':
+	ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
+	break;
+      case 'Z':
+	ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
+	break;
+      default:
+	ASSERT (0);
+	/*FALLTHRU*/
+	break;
+      }
+    }
+
+ done:
+  ASSERT (chars <= width+1);
+  if (chars != width+1)
+    {
+      (*funs->unget) (c, data);
+      TRACE (printf ("	ungetc %d, to give %d chars\n", c, chars-1));
+    }
+  chars--;
+
+  (*__gmp_free_func) (s, s_alloc);
+
+  if (invalid)
+    {
+      TRACE (printf ("	invalid\n"));
+      return -1;
+    }
+
+  TRACE (printf ("  return %d chars (cf width %d)\n", chars, width));
+  return chars;
+}
+
+
+/* Read and discard whitespace, if any.  Return number of chars skipped.
+   Whitespace skipping never provokes the EOF return from __gmp_doscan, so
+   it's not necessary to watch for EOF from funs->get, */
+static int
+skip_white (const struct gmp_doscan_funs_t *funs, void *data)
+{
+  int  c;
+  int  ret = 0;
+
+  do
+    {
+      c = (funs->get) (data);
+      ret++;
+    }
+  while (isspace (c));
+
+  (funs->unget) (c, data);
+  ret--;
+
+  TRACE (printf ("  skip white %d\n", ret));
+  return ret;
+}
+
+
+int
+__gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
+	      const char *orig_fmt, va_list orig_ap)
+{
+  struct gmp_doscan_params_t  param;
+  va_list     ap;
+  char	      *alloc_fmt;
+  const char  *fmt, *this_fmt, *end_fmt;
+  size_t      orig_fmt_len, alloc_fmt_size, len;
+  int	      new_fields, new_chars;
+  char	      fchar;
+  int	      fields = 0;
+  int	      chars = 0;
+
+  TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
+	 if (funs->scan == (gmp_doscan_scan_t) sscanf)
+	   printf ("  s=\"%s\"\n", * (const char **) data));
+
+  /* Don't modify orig_ap, if va_list is actually an array and hence call by
+     reference.  It could be argued that it'd be more efficient to leave
+     callers to make a copy if they care, but doing so here is going to be a
+     very small part of the total work, and we may as well keep applications
+     out of trouble.  */
+  va_copy (ap, orig_ap);
+
+  /* Parts of the format string are going to be copied so that a " %n" can
+     be appended.  alloc_fmt is some space for that.  orig_fmt_len+4 will be
+     needed if fmt consists of a single "%" specifier, but otherwise is an
+     overestimate.  We're not going to be very fast here, so use
+     __gmp_allocate_func rather than TMP_ALLOC.  */
+  orig_fmt_len = strlen (orig_fmt);
+  alloc_fmt_size = orig_fmt_len + 4;
+  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
+
+  fmt = orig_fmt;
+  ASSERT_CODE (end_fmt = orig_fmt + orig_fmt_len);
+
+  for (;;)
+    {
+    next:
+      fchar = *fmt++;
+
+      if (fchar == '\0')
+	break;
+
+      if (isspace (fchar))
+	{
+	  chars += skip_white (funs, data);
+	  continue;
+	}
+
+      if (fchar != '%')
+	{
+	  int  c;
+	literal:
+	  c = (funs->get) (data);
+	  if (c != fchar)
+	    {
+	      (funs->unget) (c, data);
+	      if (c == EOF)
+		{
+		eof_no_match:
+		  if (fields == 0)
+		    fields = EOF;
+		}
+	      goto done;
+	    }
+	  chars++;
+	  continue;
+	}
+
+      param.type = '\0';
+      param.base = 0;	 /* for e,f,g,i */
+      param.ignore = 0;
+      param.width = 0;
+
+      this_fmt = fmt-1;
+      TRACE (printf ("	this_fmt \"%s\"\n", this_fmt));
+
+      for (;;)
+	{
+	  ASSERT (fmt <= end_fmt);
+
+	  fchar = *fmt++;
+	  switch (fchar) {
+
+	  case '\0':  /* unterminated % sequence */
+	    ASSERT (0);
+	    goto done;
+
+	  case '%':   /* literal % */
+	    goto literal;
+
+	  case '[':   /* character range */
+	    fchar = *fmt++;
+	    if (fchar == '^')
+	      fchar = *fmt++;
+	    /* ']' allowed as the first char (possibly after '^') */
+	    if (fchar == ']')
+	      fchar = *fmt++;
+	    for (;;)
+	      {
+		ASSERT (fmt <= end_fmt);
+		if (fchar == '\0')
+		  {
+		    /* unterminated % sequence */
+		    ASSERT (0);
+		    goto done;
+		  }
+		if (fchar == ']')
+		  break;
+		fchar = *fmt++;
+	      }
+	    /*FALLTHRU*/
+	  case 'c':   /* characters */
+	  case 's':   /* string of non-whitespace */
+	  case 'p':   /* pointer */
+	  libc_type:
+	    len = fmt - this_fmt;
+	    memcpy (alloc_fmt, this_fmt, len);
+	    alloc_fmt[len++] = '%';
+	    alloc_fmt[len++] = 'n';
+	    alloc_fmt[len] = '\0';
+
+	    TRACE (printf ("  scan \"%s\"\n", alloc_fmt);
+		   if (funs->scan == (gmp_doscan_scan_t) sscanf)
+		     printf ("	s=\"%s\"\n", * (const char **) data));
+
+	    new_chars = -1;
+	    if (param.ignore)
+	      {
+		new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
+		ASSERT (new_fields == 0 || new_fields == EOF);
+	      }
+	    else
+	      {
+		void *arg = va_arg (ap, void *);
+		new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
+		ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
+
+		if (new_fields == 0)
+		  goto done;  /* invalid input */
+
+		if (new_fields == 1)
+		  ASSERT (new_chars != -1);
+	      }
+	    TRACE (printf ("  new_fields %d   new_chars %d\n",
+			   new_fields, new_chars));
+
+	    if (new_fields == -1)
+	      goto eof_no_match;  /* EOF before anything matched */
+
+	    /* Under param.ignore, when new_fields==0 we don't know if
+	       it's a successful match or an invalid field.  new_chars
+	       won't have been assigned if it was an invalid field.  */
+	    if (new_chars == -1)
+	      goto done;  /* invalid input */
+
+	    chars += new_chars;
+	    (*funs->step) (data, new_chars);
+
+	  increment_fields:
+	    if (! param.ignore)
+	      fields++;
+	    goto next;
+
+	  case 'd':   /* decimal */
+	  case 'u':   /* decimal */
+	    param.base = 10;
+	    goto numeric;
+
+	  case 'e':   /* float */
+	  case 'E':   /* float */
+	  case 'f':   /* float */
+	  case 'g':   /* float */
+	  case 'G':   /* float */
+	  case 'i':   /* integer with base marker */
+	  numeric:
+	    if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
+	      goto libc_type;
+
+	    chars += skip_white (funs, data);
+
+	    new_chars = gmpscan (funs, data, &param,
+				 param.ignore ? NULL : va_arg (ap, void*));
+	    if (new_chars == -2)
+	      goto eof_no_match;
+	    if (new_chars == -1)
+	      goto done;
+
+	    ASSERT (new_chars >= 0);
+	    chars += new_chars;
+	    goto increment_fields;
+
+	  case 'a':   /* glibc allocate string */
+	  case '\'':  /* glibc digit groupings */
+	    break;
+
+	  case 'F':   /* mpf_t */
+	  case 'j':   /* intmax_t */
+	  case 'L':   /* long long */
+	  case 'q':   /* quad_t */
+	  case 'Q':   /* mpq_t */
+	  case 't':   /* ptrdiff_t */
+	  case 'z':   /* size_t */
+	  case 'Z':   /* mpz_t */
+	  set_type:
+	    param.type = fchar;
+	    break;
+
+	  case 'h':   /* short or char */
+	    if (param.type != 'h')
+	      goto set_type;
+	    param.type = 'H';	/* internal code for "hh" */
+	    break;
+
+	    goto numeric;
+
+	  case 'l':   /* long, long long, double or long double */
+	    if (param.type != 'l')
+	      goto set_type;
+	    param.type = 'L';	/* "ll" means "L" */
+	    break;
+
+	  case 'n':
+	    if (! param.ignore)
+	      {
+		void  *p;
+		p = va_arg (ap, void *);
+		TRACE (printf ("  store %%n to %p\n", p));
+		switch (param.type) {
+		case '\0': * (int	*) p = chars; break;
+		case 'F':  mpf_set_si ((mpf_ptr) p, (long) chars); break;
+		case 'H':  * (char	*) p = chars; break;
+		case 'h':  * (short	*) p = chars; break;
+#if HAVE_INTMAX_T
+		case 'j':  * (intmax_t	*) p = chars; break;
+#else
+		case 'j':  ASSERT_FAIL (intmax_t not available); break;
+#endif
+		case 'l':  * (long	*) p = chars; break;
+#if HAVE_QUAD_T && HAVE_LONG_LONG
+		case 'q':
+		  ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
+		  /*FALLTHRU*/
+#else
+		case 'q':  ASSERT_FAIL (quad_t not available); break;
+#endif
+#if HAVE_LONG_LONG
+		case 'L':  * (long long *) p = chars; break;
+#else
+		case 'L':  ASSERT_FAIL (long long not available); break;
+#endif
+		case 'Q':  mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
+#if HAVE_PTRDIFF_T
+		case 't':  * (ptrdiff_t *) p = chars; break;
+#else
+		case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
+#endif
+		case 'z':  * (size_t	*) p = chars; break;
+		case 'Z':  mpz_set_si ((mpz_ptr) p, (long) chars); break;
+		default: ASSERT (0); break;
+		}
+	      }
+	    goto next;
+
+	  case 'o':
+	    param.base = 8;
+	    goto numeric;
+
+	  case 'x':
+	  case 'X':
+	    param.base = 16;
+	    goto numeric;
+
+	  case '0': case '1': case '2': case '3': case '4':
+	  case '5': case '6': case '7': case '8': case '9':
+	    param.width = 0;
+	    do {
+	      param.width = param.width * 10 + (fchar-'0');
+	      fchar = *fmt++;
+	    } while (isdigit (fchar));
+	    fmt--; /* unget the non-digit */
+	    break;
+
+	  case '*':
+	    param.ignore = 1;
+	    break;
+
+	  default:
+	    /* something invalid in a % sequence */
+	    ASSERT (0);
+	    goto next;
+	  }
+	}
+    }
+
+ done:
+  (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
+  return fields;
+}

diff --git a/scanf/fscanf.c b/scanf/fscanf.c
new file mode 100644
index 0000000..ce74aa4
--- /dev/null
+++ b/scanf/fscanf.c

@@ -0,0 +1,47 @@
+/* gmp_fscanf -- formatted input from a FILE.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_fscanf (FILE *fp, const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+  va_start (ap, fmt);
+
+  ret = __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/scanf/fscanffuns.c b/scanf/fscanffuns.c
new file mode 100644
index 0000000..f2d2939
--- /dev/null
+++ b/scanf/fscanffuns.c

@@ -0,0 +1,61 @@
+/* __gmp_fscanf_funs -- support for formatted input from a FILE.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+/* SunOS 4 stdio.h doesn't provide prototypes for these */
+#if ! HAVE_DECL_FGETC
+int fgetc (FILE *);
+#endif
+#if ! HAVE_DECL_FSCANF
+int fscanf (FILE *, const char *, ...);
+#endif
+#if ! HAVE_DECL_UNGETC
+int ungetc (int, FILE *);
+#endif
+
+
+static void
+step (FILE *fp, int n)
+{
+}
+
+const struct gmp_doscan_funs_t  __gmp_fscanf_funs = {
+  (gmp_doscan_scan_t)  fscanf,
+  (gmp_doscan_step_t)  step,
+  (gmp_doscan_get_t)   fgetc,
+  (gmp_doscan_unget_t) ungetc,
+};

diff --git a/scanf/scanf.c b/scanf/scanf.c
new file mode 100644
index 0000000..645188c
--- /dev/null
+++ b/scanf/scanf.c

@@ -0,0 +1,47 @@
+/* gmp_scanf -- formatted input from stdin.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_scanf (const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+  va_start (ap, fmt);
+
+  ret = __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);
+  va_end (ap);
+  return ret;
+}

diff --git a/scanf/sscanf.c b/scanf/sscanf.c
new file mode 100644
index 0000000..4486b83
--- /dev/null
+++ b/scanf/sscanf.c

@@ -0,0 +1,52 @@
+/* gmp_sscanf -- formatted input from a string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_sscanf (const char *s, const char *fmt, ...)
+{
+  va_list  ap;
+  int      ret;
+  va_start (ap, fmt);
+
+#if SSCANF_WRITABLE_INPUT
+  /* let gmp_vsscanf handle the copying */
+  ret = gmp_vsscanf (s, fmt, ap);
+#else
+  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+#endif
+  va_end (ap);
+  return ret;
+}

diff --git a/scanf/sscanffuns.c b/scanf/sscanffuns.c
new file mode 100644
index 0000000..3ee6b63
--- /dev/null
+++ b/scanf/sscanffuns.c

@@ -0,0 +1,123 @@
+/* __gmp_sscanf_funs -- support for formatted input from a string.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001-2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include "gmp-impl.h"
+
+
+#if 0
+static int
+scan (const char **sp, const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = vsscanf(*sp, fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+#else
+static int
+scan (const char **sp, const char *fmt, ...)
+{
+  va_list ap;
+  void *p1, *p2;
+  int ret;
+
+  va_start (ap, fmt);
+  p1 = va_arg (ap, void *);
+  p2 = va_arg (ap, void *);
+
+  ret = sscanf (*sp, fmt, p1, p2);
+
+  va_end (ap);
+
+  return ret;
+}
+#endif
+
+static void
+step (const char **sp, int n)
+{
+  ASSERT (n >= 0);
+
+  /* shouldn't push us past the end of the string */
+#if WANT_ASSERT
+  {
+    int  i;
+    for (i = 0; i < n; i++)
+      ASSERT ((*sp)[i] != '\0');
+  }
+#endif
+
+  (*sp) += n;
+}
+
+static int
+get (const char **sp)
+{
+  const char  *s;
+  int  c;
+  s = *sp;
+  c = (unsigned char) *s++;
+  if (c == '\0')
+    return EOF;
+  *sp = s;
+  return c;
+}
+
+static void
+unget (int c, const char **sp)
+{
+  const char  *s;
+  s = *sp;
+  if (c == EOF)
+    {
+      ASSERT (*s == '\0');
+      return;
+    }
+  s--;
+  ASSERT ((unsigned char) *s == c);
+  *sp = s;
+}
+
+const struct gmp_doscan_funs_t  __gmp_sscanf_funs = {
+  (gmp_doscan_scan_t)  scan,
+  (gmp_doscan_step_t)  step,
+  (gmp_doscan_get_t)   get,
+  (gmp_doscan_unget_t) unget,
+};

diff --git a/scanf/vfscanf.c b/scanf/vfscanf.c
new file mode 100644
index 0000000..98b810a
--- /dev/null
+++ b/scanf/vfscanf.c

@@ -0,0 +1,41 @@
+/* gmp_vfscanf -- formatted input from a FILE.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vfscanf (FILE *fp, const char *fmt, va_list ap)
+{
+  return __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);
+}

diff --git a/scanf/vscanf.c b/scanf/vscanf.c
new file mode 100644
index 0000000..2a8cfc1
--- /dev/null
+++ b/scanf/vscanf.c

@@ -0,0 +1,42 @@
+/* gmp_vscanf -- formatted input from stdin.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+
+#include <stdio.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vscanf (const char *fmt, va_list ap)
+{
+  return __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);
+}

diff --git a/scanf/vsscanf.c b/scanf/vsscanf.c
new file mode 100644
index 0000000..7c7b98a
--- /dev/null
+++ b/scanf/vsscanf.c

@@ -0,0 +1,60 @@
+/* gmp_vsscanf -- formatted input from a string.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdarg.h>
+
+#include <string.h>
+
+#include "gmp-impl.h"
+
+
+int
+gmp_vsscanf (const char *s, const char *fmt, va_list ap)
+{
+#if SSCANF_WRITABLE_INPUT
+  /* We only actually need this if there's standard C types in fmt, and if
+     "s" is not already writable, but it's too much trouble to check that,
+     and in any case this writable sscanf input business is only for a few
+     old systems. */
+  size_t size;
+  char   *alloc;
+  int    ret;
+  size = strlen (s) + 1;
+  alloc = __GMP_ALLOCATE_FUNC_TYPE (size, char);
+  memcpy (alloc, s, size);
+  s = alloc;
+  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+  (*__gmp_free_func) (alloc, size);
+  return ret;
+
+#else
+  return __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+#endif
+}

diff --git a/tal-debug.c b/tal-debug.c
new file mode 100644
index 0000000..38c27aa
--- /dev/null
+++ b/tal-debug.c

@@ -0,0 +1,150 @@
+/* TMP_ALLOC routines for debugging.
+
+Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp-impl.h"
+
+
+/* This method aims to help a malloc debugger find problems.  A linked list
+   of allocated block is kept for TMP_FREE to release.  This is reentrant
+   and thread safe.
+
+   Each TMP_ALLOC is a separate malloced block, so redzones or sentinels
+   applied by a malloc debugger either above or below can guard against
+   accesses outside the allocated area.
+
+   A marker is a "struct tmp_debug_t *" so that TMP_DECL can initialize it
+   to NULL and we can detect TMP_ALLOC without TMP_MARK.
+
+   It will work to realloc an MPZ_TMP_INIT variable, but when TMP_FREE comes
+   to release the memory it will have the old size, thereby triggering an
+   error from tests/memory.c.
+
+   Possibilities:
+
+   It'd be possible to keep a global list of active "struct tmp_debug_t"
+   records, so at the end of a program any TMP leaks could be printed.  But
+   if only a couple of routines are under test at any one time then the
+   likely culprit should be easy enough to spot.  */
+
+
+void
+__gmp_tmp_debug_mark (const char *file, int line,
+                      struct tmp_debug_t **markp, struct tmp_debug_t *mark,
+                      const char *decl_name, const char *mark_name)
+{
+  if (strcmp (mark_name, decl_name) != 0)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_MARK(%s) but TMP_DECL(%s) is in scope\n",
+               mark_name, decl_name);
+      abort ();
+    }
+
+  if (*markp != NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: Repeat of TMP_MARK(%s)\n", mark_name);
+      if (mark->file != NULL && mark->file[0] != '\0' && mark->line != -1)
+        {
+          __gmp_assert_header (mark->file, mark->line);
+          fprintf (stderr, "previous was here\n");
+        }
+      abort ();
+    }
+
+  *markp = mark;
+  mark->file = file;
+  mark->line = line;
+  mark->list = NULL;
+}
+
+void *
+__gmp_tmp_debug_alloc (const char *file, int line, int dummy,
+                       struct tmp_debug_t **markp,
+                       const char *decl_name, size_t size)
+{
+  struct tmp_debug_t        *mark = *markp;
+  struct tmp_debug_entry_t  *p;
+
+  ASSERT_ALWAYS (size >= 1);
+
+  if (mark == NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_ALLOC without TMP_MARK(%s)\n", decl_name);
+      abort ();
+    }
+
+  p = __GMP_ALLOCATE_FUNC_TYPE (1, struct tmp_debug_entry_t);
+  p->size = size;
+  p->block = (*__gmp_allocate_func) (size);
+  p->next = mark->list;
+  mark->list = p;
+  return p->block;
+}
+
+void
+__gmp_tmp_debug_free (const char *file, int line, int dummy,
+                      struct tmp_debug_t **markp,
+                      const char *decl_name, const char *free_name)
+{
+  struct tmp_debug_t        *mark = *markp;
+  struct tmp_debug_entry_t  *p, *next;
+
+  if (mark == NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_FREE(%s) without TMP_MARK(%s)\n",
+               free_name, decl_name);
+      abort ();
+    }
+
+  if (strcmp (free_name, decl_name) != 0)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_FREE(%s) when TMP_DECL(%s) is in scope\n",
+               free_name, decl_name);
+      abort ();
+    }
+
+  p = mark->list;
+  while (p != NULL)
+    {
+      next = p->next;
+      (*__gmp_free_func) (p->block, p->size);
+      __GMP_FREE_FUNC_TYPE (p, 1, struct tmp_debug_entry_t);
+      p = next;
+    }
+
+  *markp = NULL;
+}

diff --git a/tal-notreent.c b/tal-notreent.c
new file mode 100644
index 0000000..083fa84
--- /dev/null
+++ b/tal-notreent.c

@@ -0,0 +1,129 @@
+/* Stack allocation routines.  This is intended for machines without support
+   for the `alloca' function.
+
+Copyright 1996, 1997, 1999-2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+
+struct tmp_stack
+{
+  void *end;
+  void *alloc_point;
+  struct tmp_stack *prev;
+};
+typedef struct tmp_stack tmp_stack;
+
+
+static unsigned long max_total_allocation = 0;
+static unsigned long current_total_allocation = 0;
+
+static tmp_stack xxx = {&xxx, &xxx, 0};
+static tmp_stack *current = &xxx;
+
+/* The rounded size of the header of each allocation block.  */
+#define HSIZ   ROUND_UP_MULTIPLE (sizeof (tmp_stack), __TMP_ALIGN)
+
+
+/* Allocate a block of exactly <size> bytes.  This should only be called
+   through the TMP_ALLOC macro, which takes care of rounding/alignment.  */
+void *
+__gmp_tmp_alloc (unsigned long size)
+{
+  void *that;
+
+  ASSERT ((size % __TMP_ALIGN) == 0);
+  ASSERT (((unsigned) current->alloc_point % __TMP_ALIGN) == 0);
+
+  if (size > (char *) current->end - (char *) current->alloc_point)
+    {
+      void *chunk;
+      tmp_stack *header;
+      unsigned long chunk_size;
+      unsigned long now;
+
+      /* Allocate a chunk that makes the total current allocation somewhat
+	 larger than the maximum allocation ever.  If size is very large, we
+	 allocate that much.  */
+
+      now = current_total_allocation + size;
+      if (now > max_total_allocation)
+	{
+	  /* We need more temporary memory than ever before.  Increase
+	     for future needs.  */
+	  now = (now * 3 / 2 + __TMP_ALIGN - 1) & -__TMP_ALIGN;
+	  chunk_size = now - current_total_allocation + HSIZ;
+	  current_total_allocation = now;
+	  max_total_allocation = current_total_allocation;
+	}
+      else
+	{
+	  chunk_size = max_total_allocation - current_total_allocation + HSIZ;
+	  current_total_allocation = max_total_allocation;
+	}
+
+      chunk = (*__gmp_allocate_func) (chunk_size);
+      header = (tmp_stack *) chunk;
+      header->end = (char *) chunk + chunk_size;
+      header->alloc_point = (char *) chunk + HSIZ;
+      header->prev = current;
+      current = header;
+    }
+
+  that = current->alloc_point;
+  current->alloc_point = (char *) that + size;
+  ASSERT (((unsigned) that % __TMP_ALIGN) == 0);
+  return that;
+}
+
+/* Typically called at function entry.  <mark> is assigned so that
+   __gmp_tmp_free can later be used to reclaim all subsequently allocated
+   storage.  */
+void
+__gmp_tmp_mark (struct tmp_marker *mark)
+{
+  mark->which_chunk = current;
+  mark->alloc_point = current->alloc_point;
+}
+
+/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
+void
+__gmp_tmp_free (struct tmp_marker *mark)
+{
+  while (mark->which_chunk != current)
+    {
+      tmp_stack *tmp;
+
+      tmp = current;
+      current = tmp->prev;
+      current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
+      (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp);
+    }
+  current->alloc_point = mark->alloc_point;
+}

diff --git a/tal-reent.c b/tal-reent.c
new file mode 100644
index 0000000..fb43e06
--- /dev/null
+++ b/tal-reent.c

@@ -0,0 +1,81 @@
+/* TMP_ALLOC routines using malloc in a reentrant fashion.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+
+
+/* Each TMP_ALLOC uses __gmp_allocate_func to get a block of memory of the
+   size requested, plus a header at the start which is used to hold the
+   blocks on a linked list in the marker variable, ready for TMP_FREE to
+   release.
+
+   Callers should try to do multiple allocs with one call, in the style of
+   TMP_ALLOC_LIMBS_2 if it's easy to arrange, since that will keep down the
+   number of separate malloc calls.
+
+   Enhancements:
+
+   Could inline both TMP_ALLOC and TMP_FREE, though TMP_ALLOC would need the
+   compiler to have "inline" since it returns a value.  The calls to malloc
+   will be slow though, so it hardly seems worth worrying about one extra
+   level of function call.  */
+
+
+#define HSIZ   ROUND_UP_MULTIPLE (sizeof (struct tmp_reentrant_t), __TMP_ALIGN)
+
+void *
+__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **markp, size_t size)
+{
+  char    *p;
+  size_t  total_size;
+
+#define P   ((struct tmp_reentrant_t *) p)
+
+  total_size = size + HSIZ;
+  p = __GMP_ALLOCATE_FUNC_TYPE (total_size, char);
+  P->size = total_size;
+  P->next = *markp;
+  *markp = P;
+  return p + HSIZ;
+}
+
+void
+__gmp_tmp_reentrant_free (struct tmp_reentrant_t *mark)
+{
+  struct tmp_reentrant_t  *next;
+
+  while (mark != NULL)
+    {
+      next = mark->next;
+      (*__gmp_free_func) ((char *) mark, mark->size);
+      mark = next;
+    }
+}

diff --git a/tests/amd64call.asm b/tests/amd64call.asm
new file mode 100644
index 0000000..e43b01f
--- /dev/null
+++ b/tests/amd64call.asm

@@ -0,0 +1,167 @@
+dnl  AMD64 calling conventions checking.
+
+dnl  Copyright 2000, 2003, 2004, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  https://www.gnu.org/licenses/.
+
+
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
+
+include(`config.m4')
+
+
+C void x86_fldcw (unsigned short cw);
+C
+C Execute an fldcw, setting the x87 control word to cw.
+
+PROLOGUE(x86_fldcw)
+	mov	%rdi, -8(%rsp)
+	fldcw	-8(%rsp)
+	ret
+EPILOGUE()
+
+
+C unsigned short x86_fstcw (void);
+C
+C Execute an fstcw, returning the current x87 control word.
+
+PROLOGUE(x86_fstcw)
+	movq	$0, -8(%rsp)
+	fstcw	-8(%rsp)
+	mov	-8(%rsp), %rax
+	ret
+EPILOGUE()
+
+
+dnl  Instrumented profiling won't come out quite right below, since we don't do
+dnl  an actual "ret".  There's only a few instructions here, so there's no
+dnl  great need to get them separately accounted, just let them get attributed
+dnl  to the caller.  FIXME this comment might no longer be true.
+
+ifelse(WANT_PROFILING,instrument,
+`define(`WANT_PROFILING',no)')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+C
+C Perhaps the finit should be done only if the tags word isn't clear, but
+C nothing uses the rounding mode or anything at the moment.
+
+define(`WANT_RBX', eval(8*0)($1))
+define(`WANT_RBP', eval(8*1)($1))
+define(`WANT_R12', eval(8*2)($1))
+define(`WANT_R13', eval(8*3)($1))
+define(`WANT_R14', eval(8*4)($1))
+define(`WANT_R15', eval(8*5)($1))
+
+define(`JUNK_RAX', eval(8*6)($1))
+define(`JUNK_R10', eval(8*7)($1))
+define(`JUNK_R11', eval(8*8)($1))
+
+define(`SAVE_RBX', eval(8*9)($1))
+define(`SAVE_RBP', eval(8*10)($1))
+define(`SAVE_R12', eval(8*11)($1))
+define(`SAVE_R13', eval(8*12)($1))
+define(`SAVE_R14', eval(8*13)($1))
+define(`SAVE_R15', eval(8*14)($1))
+
+define(`RETADDR',  eval(8*15)($1))
+
+define(`RBX',	   eval(8*16)($1))
+define(`RBP',	   eval(8*17)($1))
+define(`R12',	   eval(8*18)($1))
+define(`R13',	   eval(8*19)($1))
+define(`R14',	   eval(8*20)($1))
+define(`R15',	   eval(8*21)($1))
+define(`RFLAGS',   eval(8*22)($1))
+
+
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
+
+	TEXT
+	ALIGN(32)
+PROLOGUE(calling_conventions)
+	mov	G(calling_conventions_values)@GOTPCREL(%rip), %rax
+	pop	RETADDR(%rax)
+
+	mov	%rbx, SAVE_RBX(%rax)
+	mov	%rbp, SAVE_RBP(%rax)
+	mov	%r12, SAVE_R12(%rax)
+	mov	%r13, SAVE_R13(%rax)
+	mov	%r14, SAVE_R14(%rax)
+	mov	%r15, SAVE_R15(%rax)
+
+	C Values we expect to see unchanged, as per amd64check.c
+	mov	WANT_RBX(%rax), %rbx
+	mov	WANT_RBP(%rax), %rbp
+	mov	WANT_R12(%rax), %r12
+	mov	WANT_R13(%rax), %r13
+	mov	WANT_R14(%rax), %r14
+	mov	WANT_R15(%rax), %r15
+
+	C Try to provoke a problem by starting with junk in the caller-saves
+	C registers, especially %rax which will be the return value.
+C	mov	JUNK_RAX(%rax), %rax		C overwritten below anyway
+	mov	JUNK_R10(%rax), %r10
+	mov	JUNK_R11(%rax), %r11
+
+	mov	G(calling_conventions_function)@GOTPCREL(%rip), %rax
+	call	*(%rax)
+
+	mov	G(calling_conventions_values)@GOTPCREL(%rip), %rcx
+
+	mov	%rbx, RBX(%rcx)
+	mov	%rbp, RBP(%rcx)
+	mov	%r12, R12(%rcx)
+	mov	%r13, R13(%rcx)
+	mov	%r14, R14(%rcx)
+	mov	%r15, R15(%rcx)
+
+	pushf
+	pop	%rbx
+	mov	%rbx, RFLAGS(%rcx)
+
+	mov	SAVE_RBX(%rcx), %rbx
+	mov	SAVE_RBP(%rcx), %rbp
+	mov	SAVE_R12(%rcx), %r12
+	mov	SAVE_R13(%rcx), %r13
+	mov	SAVE_R14(%rcx), %r14
+	mov	SAVE_R15(%rcx), %r15
+
+	C Overwrite parameter registers
+C	mov	JUNK_R9(%rcx), %r9
+C	mov	JUNK_R8(%rcx), %r8
+C	mov	JUNK_RCX(%rcx), %rcx
+C	mov	JUNK_RDX(%rcx), %rdx
+C	mov	JUNK_RSI(%rcx), %rsi
+C	mov	JUNK_RDI(%rcx), %rdi
+
+	push	RETADDR(%rcx)
+
+	mov	G(calling_conventions_fenv)@GOTPCREL(%rip), %rcx
+	fstenv	(%rcx)
+	finit
+
+	ret
+
+EPILOGUE()

diff --git a/tests/amd64check.c b/tests/amd64check.c
new file mode 100644
index 0000000..a8cdbe2
--- /dev/null
+++ b/tests/amd64check.c

@@ -0,0 +1,111 @@
+/* AMD64 calling conventions checking.
+
+Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values.  We use one vector to allow access
+   via a base pointer, very beneficial for the PIC-enabled amd64call.asm.  */
+mp_limb_t calling_conventions_values[23] =
+{
+  CNST_LIMB(0x1234567887654321),	/* want_rbx */
+  CNST_LIMB(0x89ABCDEFFEDCBA98),	/* want_rbp */
+  CNST_LIMB(0xDEADBEEFBADECAFE),	/* want_r12 */
+  CNST_LIMB(0xFFEEDDCCBBAA9988),	/* want_r13 */
+  CNST_LIMB(0x0011223344556677),	/* want_r14 */
+  CNST_LIMB(0x1234432156788765),	/* want_r15 */
+
+  CNST_LIMB(0xFEEDABBACAAFBEED),	/* JUNK_RAX */
+  CNST_LIMB(0xAB78DE89FF5125BB),	/* JUNK_R10 */
+  CNST_LIMB(0x1238901890189031)		/* JUNK_R11 */
+
+  /* rest of array used for dynamic values.  */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT	0
+#define JUNK	6
+#define SAVE	9
+#define RETADDR	15
+#define VAL	16
+#define RFLAGS	22
+
+/* values to check */
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct {
+  int  control;
+  int  status;
+  int  tag;
+  int  other[4];
+} calling_conventions_fenv;
+#ifdef __cplusplus
+}
+#endif
+
+
+const char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
+
+#define DIR_BIT(rflags)   (((rflags) & (1<<10)) != 0)
+
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+  int i;
+
+#define CHECK(callreg, regstr, value)			\
+  if (callreg != value)					\
+    {							\
+      printf ("%s   %s	got 0x%016lX want 0x%016lX\n",	\
+	      header, regstr, callreg, value);		\
+      header = "";					\
+      ret = 0;						\
+    }
+
+  for (i = 0; i < 6; i++)
+    {
+      CHECK (calling_conventions_values[VAL+i], regname[i], calling_conventions_values[WANT+i]);
+    }
+
+  if (DIR_BIT (calling_conventions_values[RFLAGS]) != 0)
+    {
+      printf ("%s   rflags dir bit  got %d want 0\n",
+	      header, DIR_BIT (calling_conventions_values[RFLAGS]));
+      header = "";
+      ret = 0;
+    }
+
+  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)
+    {
+      printf ("%s   fpu tags  got 0x%X want 0xFFFF\n",
+	      header, calling_conventions_fenv.tag & 0xFFFF);
+      header = "";
+      ret = 0;
+    }
+
+  return ret;
+}

diff --git a/tests/arm32call.asm b/tests/arm32call.asm
new file mode 100644
index 0000000..7fd4530
--- /dev/null
+++ b/tests/arm32call.asm

@@ -0,0 +1,83 @@
+dnl  ARM32 calling conventions checking.
+
+dnl  Copyright 2000, 2003, 2004, 2006, 2007, 2010, 2013, 2016 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  https://www.gnu.org/licenses/.
+
+
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
+
+include(`config.m4')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+
+define(`WANT_CALLEE_SAVES',	eval(4*0))
+define(`SAVE_CALLEE_SAVES',	eval(4*8))
+define(`RETADDR',		eval(4*16))
+define(`GOT_CALLEE_SAVES',	eval(4*17))
+define(`JUNK_PARAMS',		eval(4*25))
+
+	TEXT
+	ALIGN(32)
+PROLOGUE(calling_conventions)
+	LEA(	r12, calling_conventions_values)
+
+	C Preserve callee-saves registers, including the link register r14
+	add	r12, r12, #SAVE_CALLEE_SAVES
+	stm	r12, {r4-r11,r14}
+	sub	r12, r12, #SAVE_CALLEE_SAVES
+
+	C Put chosen junk into callee-saves registers
+	add	r12, r12, #WANT_CALLEE_SAVES
+	ldm	r12, {r4-r11}
+	sub	r12, r12, #WANT_CALLEE_SAVES
+
+	C No callee-saves registers on arm except r12 and parameter registers
+	C
+
+	C Make the actual call
+	LEA(	r12, calling_conventions_function)
+	ldr	r12, [r12]
+	mov	r14, pc
+	return	r12
+
+	LEA(	r12, calling_conventions_values)
+
+	C Save callee-saves registers after call
+	add	r12, r12, #GOT_CALLEE_SAVES
+	stm	r12, {r4-r11}
+	sub	r12, r12, #GOT_CALLEE_SAVES
+
+	C Restore callee-saves registers, including the link register r14
+	add	r12, r12, #SAVE_CALLEE_SAVES
+	ldm	r12, {r4-r11,r14}
+	sub	r12, r12, #SAVE_CALLEE_SAVES
+
+	C Overwrite parameter registers.  Note that we overwrite r1, which
+	C could hold one half of a 64-bit return value, since we don't use that
+	C in GMP.
+	add	r12, r12, #JUNK_PARAMS
+	ldm	r12, {r1-r3}
+
+	return	r14
+EPILOGUE()

diff --git a/tests/arm32check.c b/tests/arm32check.c
new file mode 100644
index 0000000..0a9f86d
--- /dev/null
+++ b/tests/arm32check.c

@@ -0,0 +1,95 @@
+/* ARM32 calling conventions checking.
+
+Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values.  */
+mp_limb_t calling_conventions_values[29] =
+{
+  0x12345678,	/*  0 want_r4 */
+  0x87654321,	/*  1 want_r5 */
+  0x89ABCDEF,	/*  2 want_r6 */
+  0xFEDCBA98,	/*  3 want_r7 */
+  0xDEADBEEF,	/*  4 want_r8 */
+  0xBADECAFE,	/*  5 want_r9 */
+  0xFFEEDDCC,	/*  6 want_r10 */
+  0xBBAA9988,	/*  7 want_r11 */
+
+  0x00000000,	/*  8 save_r4 */
+  0x00000000,	/*  9 save_r5 */
+  0x00000000,	/* 10 save_r6 */
+  0x00000000,	/* 11 save_r7 */
+  0x00000000,	/* 12 save_r8 */
+  0x00000000,	/* 13 save_r9 */
+  0x00000000,	/* 14 save_r10 */
+  0x00000000,	/* 15 save_r11 */
+  0x00000000,	/* 16 save_r14 */
+
+  0x00000000,	/* 17 got_r4 */
+  0x00000000,	/* 18 got_r5 */
+  0x00000000,	/* 19 got_r6 */
+  0x00000000,	/* 20 got_r7 */
+  0x00000000,	/* 21 got_r8 */
+  0x00000000,	/* 22 got_r9 */
+  0x00000000,	/* 23 got_r10 */
+  0x00000000,	/* 24 got_r11 */
+
+  0x00112233,	/* 25 junk_r0 */
+  0x44556677,	/* 26 junk_r1 */
+  0x12344321,	/* 27 junk_r2 */
+  0x56788765,	/* 28 junk_r3 */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT_CALLEE_SAVES	0
+#define SAVE_CALLEE_SAVES	8
+#define RETADDR			16
+#define GOT_CALLEE_SAVES	17
+#define JUNK_PARAMS		25
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+  int i;
+
+#define CHECK(callreg, regnum, value)					\
+  if (callreg != value)							\
+    {									\
+      printf ("%s   r%d	got 0x%08lX want 0x%08lX\n",			\
+	      header, regnum, callreg, value);				\
+      header = "";							\
+      ret = 0;								\
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      CHECK (calling_conventions_values[GOT_CALLEE_SAVES + i],
+	     i + 4,
+	     calling_conventions_values[WANT_CALLEE_SAVES + i]);
+    }
+
+  return ret;
+}

diff --git a/tests/cxx/clocale.c b/tests/cxx/clocale.c
new file mode 100644
index 0000000..7f7c36e
--- /dev/null
+++ b/tests/cxx/clocale.c

@@ -0,0 +1,66 @@
+/* Manipulable localeconv and nl_langinfo.
+
+Copyright 2001, 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_NL_TYPES_H
+#include <nl_types.h>  /* for nl_item */
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for lconv */
+#endif
+
+
+/* Replace the libc localeconv and nl_langinfo with ones we can manipulate.
+
+   This is done in a C file since if it was in a C++ file then we'd have to
+   match the "throw" or lack thereof declared for localeconv in <locale.h>.
+   g++ 3.2 gives an error about mismatched throws under "-pedantic", other
+   C++ compilers may very possibly do so too.  */
+
+extern char point_string[];
+
+#if HAVE_LOCALECONV && ! defined __MINGW32__
+struct lconv *
+localeconv (void)
+#if defined __cplusplus && defined __GLIBC__
+  throw()
+#endif
+{
+  static struct lconv  l;
+  l.decimal_point = point_string;
+  return &l;
+}
+#endif
+
+#if HAVE_NL_LANGINFO
+char *
+nl_langinfo (nl_item n)
+#if defined __cplusplus && defined __GLIBC__
+  throw()
+#endif
+{
+  return point_string;
+}
+#endif

diff --git a/tests/cxx/t-assign.cc b/tests/cxx/t-assign.cc
new file mode 100644
index 0000000..a11b0d9
--- /dev/null
+++ b/tests/cxx/t-assign.cc

@@ -0,0 +1,603 @@
+/* Test mp*_class assignment operators.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+#include <string>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using std::string;
+using std::invalid_argument;
+
+
+void
+check_mpz (void)
+{
+  // operator=(const mpz_class &)
+  {
+    mpz_class a(123), b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpz_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpz_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -12);
+  }
+  {
+    double a = 6.789e+3;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpz_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpz_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // swap(mpz_class &)
+  {
+    mpz_class a(123);
+    mpz_class b(456);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+
+  // swap(mpz_class &, mpz_class &)
+  {
+    mpz_class a(123);
+    mpz_class b(456);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+  {
+    using std::swap;
+    mpz_class a(123);
+    mpz_class b(456);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // operator=(const mpq_class &)
+  {
+    mpq_class a(1, 2), b;
+    b = a; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpq_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpq_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -12.375);
+  }
+  {
+    double a = 6.789e+3;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0.9375);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpq_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpq_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // swap(mpq_class &)
+  {
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
+
+  // swap(mpq_class &, mpq_class &)
+  {
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
+  {
+    using std::swap;
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // operator=(const mpf_class &)
+  {
+    mpf_class a(123), b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpf_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpf_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -12.375);
+  }
+  {
+    double a = 6.789e+3;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0.9375);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpf_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpf_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // swap(mpf_class &)
+  {
+    mpf_class a(123);
+    mpf_class b(456);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+
+  // swap(mpf_class &, mpf_class &)
+  {
+    mpf_class a(123);
+    mpf_class b(456);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+  {
+    using std::swap;
+    mpf_class a(123);
+    mpf_class b(456);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-binary.cc b/tests/cxx/t-binary.cc
new file mode 100644
index 0000000..697adfa
--- /dev/null
+++ b/tests/cxx/t-binary.cc

@@ -0,0 +1,465 @@
+/* Test mp*_class binary expressions.
+
+Copyright 2001-2003, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(1), b(2);
+    mpz_class c(a + b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpz_class a(3), b(4);
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == 12);
+  }
+  {
+    mpz_class a(5), b(3);
+    mpz_class c;
+    c = a % b; ASSERT_ALWAYS(c == 2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpz_class a(1);
+    signed int b = 3;
+    mpz_class c(a - b); ASSERT_ALWAYS(c == -2);
+  }
+  {
+    mpz_class a(-8);
+    unsigned int b = 2;
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == -4);
+  }
+  {
+    mpz_class a(2);
+    double b = 3.0;
+    mpz_class c(a + b); ASSERT_ALWAYS(c == 5);
+  }
+  {
+    mpz_class a(4);
+    mpz_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 4);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(3);
+    signed int b = 9;
+    mpz_class c(b / a); ASSERT_ALWAYS(c == 3);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpz_class a(3), b(4);
+    mpz_class c(a * (-b)); ASSERT_ALWAYS(c == -12);
+    c = c * (-b); ASSERT_ALWAYS(c == 48);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(3), b(2), c(1);
+    mpz_class d;
+    d = (a % b) + c; ASSERT_ALWAYS(d == 2);
+    d = (a % b) + d; ASSERT_ALWAYS(d == 3);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpz_class a(-5);
+    unsigned int b = 2;
+    mpz_class c((-a) << b); ASSERT_ALWAYS(c == 20);
+  }
+  {
+    mpz_class a(5), b(-4);
+    signed int c = 3;
+    mpz_class d;
+    d = (a * b) >> c; ASSERT_ALWAYS(d == -3);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpz_class a(2), b(4);
+    double c = 6;
+    mpz_class d(c / (a - b)); ASSERT_ALWAYS(d == -3);
+  }
+  {
+    mpz_class a(3), b(2);
+    double c = 1;
+    mpz_class d;
+    d = c + (a + b); ASSERT_ALWAYS(d == 6);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpz_class a(3), b(5), c(7);
+    mpz_class d;
+    d = (a - b) * (-c); ASSERT_ALWAYS(d == 14);
+    d = (b - d) * (-a); ASSERT_ALWAYS(d == 27);
+    d = (a - b) * (-d); ASSERT_ALWAYS(d == 54);
+  }
+
+  {
+    mpz_class a(0xcafe), b(0xbeef), c, want;
+    c = a & b; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | b; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ b; ASSERT_ALWAYS (c == 0x7411);
+    c = a & 0xbeef; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | 0xbeef; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ 0xbeef; ASSERT_ALWAYS (c == 0x7411);
+    c = a & -0xbeef; ASSERT_ALWAYS (c == 0x4010);
+    c = a | -0xbeef; ASSERT_ALWAYS (c == -0x3401);
+    c = a ^ -0xbeef; ASSERT_ALWAYS (c == -0x7411);
+    c = a & 48879.0; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | 48879.0; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ 48879.0; ASSERT_ALWAYS (c == 0x7411);
+
+    c = a | 1267650600228229401496703205376.0; // 2^100
+    want = "0x1000000000000000000000cafe";
+    ASSERT_ALWAYS (c == want);
+  }
+
+}
+
+void
+check_mpq (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpq_class a(1, 2);
+    signed int b = 3;
+    mpq_class c(a - b); ASSERT_ALWAYS(c == -2.5);
+  }
+  {
+    mpq_class a(1, 2);
+    mpq_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(2, 3);
+    signed int b = 4;
+    mpq_class c;
+    c = b / a; ASSERT_ALWAYS(c == 6);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+  {
+    mpq_class a(1, 2);
+    mpz_class b(1);
+    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.5);
+  }
+  {
+    mpq_class a(2, 3);
+    mpz_class b(1);
+    double c = 2.0;
+    mpq_class d;
+    d = a * (b + c); ASSERT_ALWAYS(d == 2);
+    d = d * (b + c); ASSERT_ALWAYS(d == 6);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(2, 3);
+    mpz_class b(4);
+    mpq_class c(b / a); ASSERT_ALWAYS(c == 6);
+  }
+  {
+    mpq_class a(2, 3);
+    mpz_class b(1), c(4);
+    mpq_class d;
+    d = (b - c) * a; ASSERT_ALWAYS(d == -2);
+    d = (b - c) * d; ASSERT_ALWAYS(d == 6);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpq_class c;
+    c = a * (-b); ASSERT_ALWAYS(c == -0.25);
+    a = a * (-b); ASSERT_ALWAYS(a == -0.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1, 3), b(2, 3), c(1, 4);
+    mpq_class d((a / b) + c); ASSERT_ALWAYS(d == 0.75);
+    c = (a / b) + c; ASSERT_ALWAYS(c == 0.75);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpq_class a(3, 8);
+    unsigned int b = 4;
+    mpq_class c((-a) << b); ASSERT_ALWAYS(c == -6);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpq_class a(1, 2), b(1, 4);
+    double c = 6.0;
+    mpq_class d;
+    d = c / (a + b); ASSERT_ALWAYS(d == 8);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  {
+    mpq_class a(1, 2), b(1, 4);
+    mpz_class c(1);
+    mpq_class d((a + b) - c); ASSERT_ALWAYS(d == -0.25);
+    d = (a + d) - c; ASSERT_ALWAYS(d == -0.75);
+    d = (a + d) - d.get_num(); ASSERT_ALWAYS(d == 2.75);
+    d = (2 * d) * d.get_den(); ASSERT_ALWAYS(d == 22);
+    d = (b * d) / -d.get_num(); ASSERT_ALWAYS(d == -0.25);
+  }
+  {
+    mpq_class a(1, 3), b(3, 2);
+    mpz_class c(2), d(4);
+    mpq_class e;
+    e = (a * b) / (c - d); ASSERT_ALWAYS(e == -0.25);
+    e = (2 * e) / (c - d); ASSERT_ALWAYS(e ==  0.25);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpz_class c(-3);
+    mpq_class d(c * (a * b)); ASSERT_ALWAYS(d == -0.75);
+  }
+  {
+    mpq_class a(1, 3), b(3, 5);
+    mpz_class c(6);
+    signed int d = 4;
+    mpq_class e;
+    e = (c % d) / (a * b); ASSERT_ALWAYS(e == 10);
+    e = (e.get_num() % d) / (2 / e); ASSERT_ALWAYS(e == 10);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4), c(2, 5);
+    mpq_class d;
+    d = (a * b) / (-c); ASSERT_ALWAYS(d == -0.625);
+    d = (c * d) / (-b); ASSERT_ALWAYS(3 * d == 1);
+    d = (a * c) / (-d); ASSERT_ALWAYS(5 * d == -2);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(1), b(2);
+    mpf_class c(a + b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpf_class a(1.5), b(6);
+    mpf_class c;
+    c = a / b; ASSERT_ALWAYS(c == 0.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpf_class a(1);
+    signed int b = -2;
+    mpf_class c(a - b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(2);
+    unsigned int b = 3;
+    mpf_class c;
+    c = b / a; ASSERT_ALWAYS(c == 1.5);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+  {
+    mpf_class a(2);
+    mpz_class b(3);
+    mpf_class c(a - b); ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpf_class a(3);
+    mpz_class b(2), c(1);
+    mpf_class d;
+    d = a * (b + c); ASSERT_ALWAYS(d == 9);
+    a = a * (b + c); ASSERT_ALWAYS(a == 9);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(6);
+    mpq_class b(3, 4);
+    mpf_class c(a * b); ASSERT_ALWAYS(c == 4.5);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpf_class a(2), b(-3);
+    mpf_class c;
+    c = a * (-b); ASSERT_ALWAYS(c == 6);
+    c = c * (-b); ASSERT_ALWAYS(c == 18);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(3), b(4), c(5);
+    mpf_class d;
+    d = (a / b) - c; ASSERT_ALWAYS(d == -4.25);
+    c = (a / b) - c; ASSERT_ALWAYS(c == -4.25);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpf_class a(3);
+    unsigned int b = 2;
+    mpf_class c((-a) >> b); ASSERT_ALWAYS(c == -0.75);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpf_class a(2), b(3);
+    double c = 5.0;
+    mpf_class d;
+    d = c / (a + b); ASSERT_ALWAYS(d == 1);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  {
+    mpf_class a(2), b(3);
+    mpz_class c(4);
+    mpf_class d;
+    d = (a + b) * c; ASSERT_ALWAYS(d == 20);
+  }
+  {
+    mpf_class a(2), b(3);
+    mpq_class c(1, 2), d(1, 4);
+    mpf_class e;
+    e = (a * b) / (c + d); ASSERT_ALWAYS(e == 8);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  {
+    mpf_class a(1), b(2);
+    mpq_class c(3);
+    mpf_class d(c / (a + b)); ASSERT_ALWAYS(d == 1);
+  }
+  {
+    mpf_class a(1);
+    mpz_class b(2);
+    mpq_class c(3, 4);
+    mpf_class d;
+    d = (-c) + (a + b); ASSERT_ALWAYS(d == 2.25);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpf_class a(1), b(2), c(3);
+    mpf_class d;
+    d = (a + b) * (-c); ASSERT_ALWAYS(d == -9);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-cast.cc b/tests/cxx/t-cast.cc
new file mode 100644
index 0000000..983b3b8
--- /dev/null
+++ b/tests/cxx/t-cast.cc

@@ -0,0 +1,56 @@
+/* Test g++ -Wold-style-cast cleanliness.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+
+
+/* This code doesn't do anything when run, it just expands various C macros
+   to see that they don't trigger compile-time warnings from g++
+   -Wold-style-cast.  This option isn't used in a normal build, it has to be
+   added manually to make this test worthwhile.  */
+
+void
+check_macros (void)
+{
+  mpz_t          z;
+  long           l = 123;
+  unsigned long  u = 456;
+  int            i;
+  mp_limb_t      limb;
+
+  mpz_init_set_ui (z, 0L);
+  i = mpz_odd_p (z);
+  i = mpz_even_p (z);
+  i = mpz_cmp_si (z, l);
+  i = mpz_cmp_ui (z, u);
+  mpz_clear (z);
+
+  limb = GMP_NUMB_MASK;
+  limb = GMP_NUMB_MAX;
+  limb = GMP_NAIL_MASK;
+
+  mpn_divmod (&limb, &limb, 1, &limb, 1);
+  mpn_divexact_by3 (&limb, &limb, 1);
+}
+
+int
+main (void)
+{
+  return 0;
+}

diff --git a/tests/cxx/t-constr.cc b/tests/cxx/t-constr.cc
new file mode 100644
index 0000000..500878e
--- /dev/null
+++ b/tests/cxx/t-constr.cc

@@ -0,0 +1,755 @@
+/* Test mp*_class constructors.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+#include <string>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // mpz_class()
+  {
+    mpz_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpz_class(const mpz_class &)
+  // see below
+
+  // template <class T, class U> mpz_class(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpz_class(signed char)
+  {
+    signed char a = -127;
+    mpz_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpz_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpz_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpz_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpz_class a('z'); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpz_class(signed int)
+  {
+    signed int a = 0;
+    mpz_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpz_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpz_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpz_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpz_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpz_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpz_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpz_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpz_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpz_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpz_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpz_class(unsigned long int)
+  {
+    unsigned long int a = 1UL << 30;
+    mpz_class b(a); ASSERT_ALWAYS(b == 1073741824L);
+  }
+
+  // mpz_class(float)
+  {
+    float a = 123.45;
+    mpz_class b(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // mpz_class(double)
+  {
+    double a = 3.141592653589793238;
+    mpz_class b(a); ASSERT_ALWAYS(b == 3);
+  }
+
+  // mpz_class(long double)
+  // currently not implemented
+
+  // mpz_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpz_class(const char *, int)
+  {
+    const char *a = "FFFF";
+    int base = 16;
+    mpz_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // mpz_class(const std::string &)
+  {
+    string a("1234567890");
+    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpz_class(const std::string &, int)
+  {
+    string a("7777");
+    int base = 8;
+    mpz_class b(a, base); ASSERT_ALWAYS(b == 4095);
+  }
+
+  // mpz_class(const char *) with invalid
+  {
+    try {
+      const char *a = "ABC";
+      mpz_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpz_class(const char *, int) with invalid
+  {
+    try {
+      const char *a = "GHI";
+      int base = 16;
+      mpz_class b(a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpz_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpz_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpz_class(const std::string &, int) with invalid
+  {
+    try {
+      string a("ZZZ");
+      int base = 8;
+      mpz_class b(a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpz_class(mpz_srcptr)
+  {
+    mpz_t a;
+    mpz_init_set_ui(a, 100);
+    mpz_class b(a); ASSERT_ALWAYS(b == 100);
+    mpz_clear(a);
+  }
+
+  // mpz_class(const mpz_class &)
+  {
+    mpz_class a(12345); // tested above, assume it works
+    mpz_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructor for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpz_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpz_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // mpq_class()
+  {
+    mpq_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpq_class(const mpq_class &)
+  // see below
+
+  // template <class T, class U> mpq_class(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpq_class(signed char)
+  {
+    signed char a = -127;
+    mpq_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpq_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpq_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpq_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpq_class a('z'); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpq_class(signed int)
+  {
+    signed int a = 0;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpq_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpq_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpq_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpq_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpq_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpq_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpq_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpq_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpq_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpq_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpq_class(unsigned long int)
+  {
+    unsigned long int a = 1UL << 30;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1073741824L);
+  }
+
+  // mpq_class(float)
+  {
+    float a = 0.625;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0.625);
+  }
+
+  // mpq_class(double)
+  {
+    double a = 1.25;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1.25);
+  }
+
+  // mpq_class(long double)
+  // currently not implemented
+
+  // mpq_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpq_class(const char *, int)
+  {
+    const char *a = "FFFF";
+    int base = 16;
+    mpq_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+    mpq_class c(0, 1); ASSERT_ALWAYS(c == 0);
+  }
+
+  // mpq_class(const std::string &)
+  {
+    string a("1234567890");
+    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpq_class(const std::string &, int)
+  {
+    string a("7777");
+    int base = 8;
+    mpq_class b(a, base); ASSERT_ALWAYS(b == 4095);
+  }
+
+  // mpq_class(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpq_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpq_class(const char *, int) with invalid
+  {
+    try {
+      const char *a = "ZZZ";
+      int base = 16;
+      mpq_class b (a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpq_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpq_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpq_class(const std::string &, int) with invalid
+  {
+    try {
+      string a("ZZZ");
+      int base = 8;
+      mpq_class b (a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpq_class(mpq_srcptr)
+  {
+    mpq_t a;
+    mpq_init(a);
+    mpq_set_ui(a, 100, 1);
+    mpq_class b(a); ASSERT_ALWAYS(b == 100);
+    mpq_clear(a);
+  }
+
+  // mpq_class(const mpz_class &, const mpz_class &)
+  {
+    mpz_class a(123), b(4); // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == 30.75);
+  }
+  {
+    mpz_class a(-1), b(2);  // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == -0.5);
+  }
+  {
+    mpz_class a(5), b(4); // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == 1.25);
+  }
+
+  // mpq_class(const mpz_class &)
+  {
+    mpq_class a(12345); // tested above, assume it works
+    mpq_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructor for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // mpf_class()
+  {
+    mpf_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpf_class(const mpf_class &)
+  // mpf_class(const mpf_class &, unsigned long int)
+  // see below
+
+  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &)
+  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &,
+  //                                       unsigned long int)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpf_class(signed char)
+  {
+    signed char a = -127;
+    mpf_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpf_class(signed char, unsigned long int)
+  {
+    signed char a = -1;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1);
+  }
+
+  // mpf_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpf_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // mpf_class(unsigned char, unsigned long int)
+  {
+    unsigned char a = 128;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 128);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpf_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    int prec = 256;
+    mpf_class a('z', prec); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpf_class(signed int)
+  {
+    signed int a = 0;
+    mpf_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpf_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpf_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpf_class(signed int, unsigned long int)
+  {
+    signed int a = -123;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -123);
+  }
+
+  // mpf_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpf_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpf_class(unsigned int, unsigned long int)
+  {
+    unsigned int a = 890;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpf_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpf_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpf_class(signed short int, unsigned long int)
+  {
+    signed short int a = 6789;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 6789);
+  }
+
+  // mpf_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpf_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpf_class(unsigned short int, unsigned long int)
+  {
+    unsigned short int a = 54321u;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpf_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpf_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpf_class(signed long int, unsigned long int)
+  {
+    signed long int a = -1234567890L;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpf_class(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpf_class b(a); ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // mpf_class(unsigned long int, unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // mpf_class(float)
+  {
+    float a = 1234.5;
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234.5);
+  }
+
+  // mpf_class(float, unsigned long int)
+  {
+    float a = 1234.5;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234.5);
+  }
+
+  // mpf_class(double)
+  {
+    double a = 12345.0;
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+  {
+    double a = 1.2345e+4;
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+  {
+    double a = 312.5e-2;
+    mpf_class b(a); ASSERT_ALWAYS(b == 3.125);
+  }
+
+  // mpf_class(double, unsigned long int)
+  {
+    double a = 5.4321e+4;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321L);
+  }
+
+  // mpf_class(long double)
+  // mpf_class(long double, unsigned long int)
+  // currently not implemented
+
+  // mpf_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpf_class(const char *, unsigned long int, int = 0)
+  {
+    const char *a = "1234567890";
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+  }
+  {
+    const char *a = "777777";
+    int prec = 64, base = 8;
+    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);
+  }
+
+  // mpf_class(const std::string &)
+  {
+    string a("1234567890");
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpf_class(const std::string &, unsigned long int, int = 0)
+  {
+    string a("1234567890");
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+  }
+  {
+    string a("FFFF");
+    int prec = 256, base = 16;
+    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // mpf_class(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpf_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpf_class(const char *, unsigned long int, int = 0) with invalid
+  {
+    try {
+      const char *a = "def";
+      int prec = 256;
+      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+  {
+    try {
+      const char *a = "ghi";
+      int prec = 64, base = 8;
+      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpf_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpf_class(const std::string &, unsigned long int, int = 0) with invalid
+  {
+    try {
+      string a("def");
+      int prec = 128;
+      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+  {
+    try {
+      string a("ghi");
+      int prec = 256, base = 16;
+      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument&) {
+    }
+  }
+
+  // mpf_class(mpf_srcptr)
+  {
+    mpf_t a;
+    mpf_init_set_ui(a, 100);
+    mpf_class b(a); ASSERT_ALWAYS(b == 100);
+    mpf_clear(a);
+  }
+
+  // mpf_class(mpf_srcptr, unsigned long int)
+  {
+    mpf_t a;
+    int prec = 64;
+    mpf_init_set_ui(a, 100);
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 100);
+    mpf_clear(a);
+  }
+
+  // mpf_class(const mpf_class &)
+  {
+    mpf_class a(12345); // tested above, assume it works
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // mpf_class(const mpf_class &, unsigned long int)
+  {
+    mpf_class a(12345); // tested above, assume it works
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructors for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpf_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpf_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    bool a = true;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-cxx11.cc b/tests/cxx/t-cxx11.cc
new file mode 100644
index 0000000..8d6fccb
--- /dev/null
+++ b/tests/cxx/t-cxx11.cc

@@ -0,0 +1,232 @@
+/* Test C++11 features
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMPXX_USE_CXX11
+
+#include <utility>
+#include <type_traits>
+
+void check_noexcept ()
+{
+  mpz_class z1, z2;
+  mpq_class q1, q2;
+  mpf_class f1, f2;
+  static_assert(noexcept(z1 = std::move(z2)), "sorry");
+  static_assert(noexcept(q1 = std::move(q2)), "sorry");
+  static_assert(noexcept(f1 = std::move(f2)), "sorry");
+  static_assert(noexcept(q1 = std::move(z1)), "sorry");
+
+  // Only mpz has lazy allocation for now
+  static_assert(std::is_nothrow_default_constructible<mpz_class>::value, "sorry");
+  static_assert(std::is_nothrow_move_constructible<mpz_class>::value, "sorry");
+  static_assert(!std::is_nothrow_default_constructible<mpq_class>::value, "sorry");
+  static_assert(!std::is_nothrow_move_constructible<mpq_class>::value, "sorry");
+  static_assert(!std::is_nothrow_default_constructible<mpf_class>::value, "sorry");
+  static_assert(!std::is_nothrow_move_constructible<mpf_class>::value, "sorry");
+}
+
+void check_common_type ()
+{
+#define CHECK_COMMON_TYPE1(T, Res) \
+  static_assert(std::is_same<std::common_type<T>::type, Res>::value, "sorry")
+#define CHECK_COMMON_TYPE(T, U, Res) \
+  static_assert(std::is_same<std::common_type<T, U>::type, Res>::value, "sorry")
+#define CHECK_COMMON_TYPE_BUILTIN1(T, Res) \
+  CHECK_COMMON_TYPE(  signed char , T, Res); \
+  CHECK_COMMON_TYPE(unsigned char , T, Res); \
+  CHECK_COMMON_TYPE(  signed short, T, Res); \
+  CHECK_COMMON_TYPE(unsigned short, T, Res); \
+  CHECK_COMMON_TYPE(  signed int  , T, Res); \
+  CHECK_COMMON_TYPE(unsigned int  , T, Res); \
+  CHECK_COMMON_TYPE(  signed long , T, Res); \
+  CHECK_COMMON_TYPE(unsigned long , T, Res); \
+  CHECK_COMMON_TYPE(float , T, Res); \
+  CHECK_COMMON_TYPE(double, T, Res)
+#define CHECK_COMMON_TYPE_BUILTIN2(T, Res) \
+  CHECK_COMMON_TYPE(T,   signed char , Res); \
+  CHECK_COMMON_TYPE(T, unsigned char , Res); \
+  CHECK_COMMON_TYPE(T,   signed short, Res); \
+  CHECK_COMMON_TYPE(T, unsigned short, Res); \
+  CHECK_COMMON_TYPE(T,   signed int  , Res); \
+  CHECK_COMMON_TYPE(T, unsigned int  , Res); \
+  CHECK_COMMON_TYPE(T,   signed long , Res); \
+  CHECK_COMMON_TYPE(T, unsigned long , Res); \
+  CHECK_COMMON_TYPE(T, float , Res); \
+  CHECK_COMMON_TYPE(T, double, Res)
+#define CHECK_COMMON_TYPE_BUILTIN(T, Res) \
+  CHECK_COMMON_TYPE_BUILTIN1(T, Res); \
+  CHECK_COMMON_TYPE_BUILTIN2(T, Res)
+  /* These would just work with implicit conversions */
+  CHECK_COMMON_TYPE (mpz_class, mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (mpz_class, mpf_class, mpf_class);
+  CHECK_COMMON_TYPE (mpf_class, mpq_class, mpf_class);
+
+  CHECK_COMMON_TYPE_BUILTIN (mpz_class, mpz_class);
+  CHECK_COMMON_TYPE_BUILTIN (mpq_class, mpq_class);
+  CHECK_COMMON_TYPE_BUILTIN (mpf_class, mpf_class);
+
+  mpz_class z; mpq_class q; mpf_class f;
+
+  CHECK_COMMON_TYPE (decltype(-z), mpz_class, mpz_class);
+  CHECK_COMMON_TYPE (decltype(-q), mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpf_class, mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-z), mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-z), mpf_class, mpf_class);
+  CHECK_COMMON_TYPE (decltype(-q), mpf_class, mpf_class);
+
+  /* These require a common_type specialization */
+  CHECK_COMMON_TYPE (decltype(-z), decltype(z+z), mpz_class);
+  CHECK_COMMON_TYPE (decltype(-q), decltype(q+q), mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), decltype(f+f), mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-q), mpz_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpz_class, mpf_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpq_class, mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-f), mpf_class);
+  CHECK_COMMON_TYPE (decltype(-q), decltype(-f), mpf_class);
+
+  /* common_type now decays */
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-z), mpz_class);
+  CHECK_COMMON_TYPE (decltype(-q), decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), decltype(-f), mpf_class);
+  CHECK_COMMON_TYPE1 (decltype(-z), mpz_class);
+  CHECK_COMMON_TYPE1 (decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE1 (decltype(-f), mpf_class);
+
+  /* Painful */
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-z), mpz_class);
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-f), mpf_class);
+}
+
+template<class T, class U = T>
+void check_move_init ()
+{
+  {
+    // Delete moved-from x1
+    T x1 = 3;
+    U x2 = std::move(x1);
+    ASSERT_ALWAYS (x2 == 3);
+  }
+  {
+    // Assign to moved-from x1
+    T x1 = 2;
+    U x2 = std::move(x1);
+    x1 = -7;
+    ASSERT_ALWAYS (x1 == -7);
+    ASSERT_ALWAYS (x2 == 2);
+  }
+}
+
+template<class T, class U = T>
+void check_move_assign ()
+{
+  {
+    // Delete moved-from x1
+    T x1 = 3; U x2;
+    x2 = std::move(x1);
+    ASSERT_ALWAYS (x2 == 3);
+  }
+  {
+    // Assign to moved-from x1
+    T x1 = 2; U x2;
+    x2 = std::move(x1);
+    x1 = -7;
+    ASSERT_ALWAYS (x1 == -7);
+    ASSERT_ALWAYS (x2 == 2);
+  }
+  {
+    // Self move-assign (not necessary, but it happens to work...)
+    T x = 4;
+    x = std::move(x);
+    ASSERT_ALWAYS (x == 4);
+  }
+}
+
+void check_user_defined_literal ()
+{
+  ASSERT_ALWAYS (123_mpz % 5 == 3);
+  ASSERT_ALWAYS (-11_mpq / 22 == -.5);
+  ASSERT_ALWAYS (112.5e-1_mpf * 4 == 45);
+  {
+    mpz_class ref ( "123456789abcdef0123456789abcdef0123", 16);
+    ASSERT_ALWAYS (0x123456789abcdef0123456789abcdef0123_mpz == ref);
+  }
+}
+
+// Check for explicit conversion to bool
+void implicit_bool(bool);
+int implicit_bool(...);
+
+void check_bool_conversion ()
+{
+  const mpz_class zn = -2;
+  const mpq_class qn = -2;
+  const mpf_class fn = -2;
+  const mpz_class z0 =  0;
+  const mpq_class q0 =  0;
+  const mpf_class f0 =  0;
+  const mpz_class zp = +2;
+  const mpq_class qp = +2;
+  const mpf_class fp = +2;
+  if (zn && qn && fn && zp && qp && fp && !z0 && !q0 && !f0)
+    {
+      if (z0 || q0 || f0) ASSERT_ALWAYS(false);
+    }
+  else ASSERT_ALWAYS(false);
+  decltype(implicit_bool(zn)) zi = 1;
+  decltype(implicit_bool(qn)) qi = 1;
+  decltype(implicit_bool(fn)) fi = 1;
+  (void)(zi+qi+fi);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  check_noexcept();
+  check_common_type();
+  check_move_init<mpz_class>();
+  check_move_init<mpq_class>();
+  check_move_init<mpf_class>();
+  check_move_assign<mpz_class>();
+  check_move_assign<mpq_class>();
+  check_move_assign<mpf_class>();
+  check_move_init<mpz_class,mpq_class>();
+  check_move_assign<mpz_class,mpq_class>();
+  check_user_defined_literal();
+  check_bool_conversion();
+
+  tests_end();
+  return 0;
+}
+
+#else
+int main () { return 0; }
+#endif

diff --git a/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc b/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc
new file mode 100644
index 0000000..014eb75
--- /dev/null
+++ b/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc

@@ -0,0 +1,38 @@
+/* Test if the compiler has working try / throw / catch.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdexcept>
+
+inline void
+throw_expr ()
+{
+  throw std::invalid_argument ("Test");
+}
+
+using namespace std;
+
+int
+main ()
+{
+  try
+  {
+    throw_expr();
+  }
+  catch (invalid_argument&) { }
+}

diff --git a/tests/cxx/t-headers.cc b/tests/cxx/t-headers.cc
new file mode 100644
index 0000000..35f7a25
--- /dev/null
+++ b/tests/cxx/t-headers.cc

@@ -0,0 +1,26 @@
+/* Test that gmpxx.h compiles correctly.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+
+int
+main (void)
+{
+  return 0;
+}

diff --git a/tests/cxx/t-iostream.cc b/tests/cxx/t-iostream.cc
new file mode 100644
index 0000000..76e280b
--- /dev/null
+++ b/tests/cxx/t-iostream.cc

@@ -0,0 +1,106 @@
+/* Test stream formatted input and output on mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <sstream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+// The tests are extremely basic. These functions just forward to the
+// ones tested in t-istream.cc and t-ostream.cc; we rely on those for
+// advanced tests and only check the syntax here.
+
+void
+checki ()
+{
+  {
+    istringstream i("123");
+    mpz_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == 123);
+  }
+  {
+    istringstream i("3/4");
+    mpq_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == .75);
+  }
+  {
+    istringstream i("1.5");
+    mpf_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == 1.5);
+  }
+}
+
+void
+checko ()
+{
+  {
+    ostringstream o;
+    mpz_class x=123;
+    o << x;
+    ASSERT_ALWAYS (o.str() == "123");
+  }
+  {
+    ostringstream o;
+    mpz_class x=123;
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "124");
+  }
+  {
+    ostringstream o;
+    mpq_class x(3,4);
+    o << x;
+    ASSERT_ALWAYS (o.str() == "3/4");
+  }
+  {
+    ostringstream o;
+    mpq_class x(3,4);
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "7/4");
+  }
+  {
+    ostringstream o;
+    mpf_class x=1.5;
+    o << x;
+    ASSERT_ALWAYS (o.str() == "1.5");
+  }
+  {
+    ostringstream o;
+    mpf_class x=1.5;
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "2.5");
+  }
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  checki ();
+  checko ();
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/cxx/t-istream.cc b/tests/cxx/t-istream.cc
new file mode 100644
index 0000000..76bcbab
--- /dev/null
+++ b/tests/cxx/t-istream.cc

@@ -0,0 +1,598 @@
+/* Test istream formatted input.
+
+Copyright 2001-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdlib>
+#include <cstring>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+// Under option_check_standard, the various test cases for mpz operator>>
+// are put through the standard operator>> for long, and likewise mpf
+// operator>> is put through double.
+//
+// In g++ 3.3 this results in some printouts about the final position
+// indicated for something like ".e123".  Our mpf code stops at the "e"
+// since there's no mantissa digits, but g++ reads the whole thing and only
+// then decides it's bad.
+
+bool option_check_standard = false;
+
+
+// On some versions of g++ 2.96 it's been observed that putback() may leave
+// tellg() unchanged.  We believe this is incorrect and presumably the
+// result of a bug, since for instance it's ok in g++ 2.95 and g++ 3.3.  We
+// detect the problem at runtime and disable affected checks.
+
+bool putback_tellg_works = true;
+
+void
+check_putback_tellg (void)
+{
+  istringstream input ("hello");
+  streampos  old_pos, new_pos;
+  char  c;
+
+  input.get(c);
+  old_pos = input.tellg();
+  input.putback(c);
+  new_pos = input.tellg();
+
+  if (old_pos == new_pos)
+    {
+      cout << "Warning, istringstream has a bug: putback() doesn't update tellg().\n";;
+      cout << "Tests on tellg() will be skipped.\n";
+      putback_tellg_works = false;
+    }
+}
+
+
+#define WRONG(str)                                              \
+  do {                                                          \
+    cout << str ", data[" << i << "]\n";                        \
+    cout << "  input: \"" << data[i].input << "\"\n";           \
+    cout << "  flags: " << hex << input.flags() << dec << "\n"; \
+  } while (0)
+
+void
+check_mpz (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",      -1, "0",    (ios::fmtflags) 0 },
+    { "123",    -1, "123",  (ios::fmtflags) 0 },
+    { "0123",   -1, "83",   (ios::fmtflags) 0 },
+    { "0x123",  -1, "291",  (ios::fmtflags) 0 },
+    { "-123",   -1, "-123", (ios::fmtflags) 0 },
+    { "-0123",  -1, "-83",  (ios::fmtflags) 0 },
+    { "-0x123", -1, "-291", (ios::fmtflags) 0 },
+    { "+123",   -1, "123", (ios::fmtflags) 0 },
+    { "+0123",  -1, "83",  (ios::fmtflags) 0 },
+    { "+0x123", -1, "291", (ios::fmtflags) 0 },
+
+    { "0",     -1, "0",    ios::dec },
+    { "1f",     1, "1",    ios::dec },
+    { "011f",   3, "11",   ios::dec },
+    { "123",   -1, "123",  ios::dec },
+    { "-1f",    2, "-1",   ios::dec },
+    { "-011f",  4, "-11",  ios::dec },
+    { "-123",  -1, "-123", ios::dec },
+    { "+1f",    2, "1",    ios::dec },
+    { "+011f",  4, "11",   ios::dec },
+    { "+123",  -1, "123",  ios::dec },
+
+    { "0",    -1, "0",   ios::oct },
+    { "123",  -1, "83",  ios::oct },
+    { "-123", -1, "-83", ios::oct },
+    { "+123", -1, "83",  ios::oct },
+
+    { "0",    -1, "0",    ios::hex },
+    { "123",  -1, "291",  ios::hex },
+    { "ff",   -1, "255",  ios::hex },
+    { "FF",   -1, "255",  ios::hex },
+    { "-123", -1, "-291", ios::hex },
+    { "-ff",  -1, "-255", ios::hex },
+    { "-FF",  -1, "-255", ios::hex },
+    { "+123", -1, "291",  ios::hex },
+    { "+ff",  -1, "255",  ios::hex },
+    { "+FF",  -1, "255",  ios::hex },
+    { "ab",   -1, "171",  ios::hex },
+    { "cd",   -1, "205",  ios::hex },
+    { "ef",   -1, "239",  ios::hex },
+
+    { " 123",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws
+    { " 123", -1, "123", ios::skipws },
+  };
+
+  mpz_t      got, want;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
+  long       got_si, want_si;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      size_t input_length = strlen (data[i].input);
+      want_pos = (data[i].want_pos == -1
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpz_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpz_set_ui (want, 0L);
+
+      if (option_check_standard && mpz_fits_slong_p (want))
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+          want_si = mpz_get_si (want);
+
+          input >> got_si;
+          got_ok = !input.fail();
+          got_eof = input.eof();
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpz");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && got_si != want_si)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpz");
+              cout << "  got_si:  " << got_si << "\n";
+              cout << "  want_si: " << want_si << "\n";
+            }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpz");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpz");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+
+        mpz_set_ui (got, 0xDEAD);
+        input >> got;
+        got_ok = !input.fail();
+	got_eof = input.eof();
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpz operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        if (want_ok && mpz_cmp (got, want) != 0)
+          {
+            WRONG ("mpz operator>> wrong result");
+            mpz_trace ("  got ", got);
+            mpz_trace ("  want", want);
+            abort ();
+          }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpz operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpz operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_mpq (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",   -1, "0", (ios::fmtflags) 0 },
+    { "00",  -1, "0", (ios::fmtflags) 0 },
+    { "0x0", -1, "0", (ios::fmtflags) 0 },
+
+    { "123/456",   -1, "123/456", ios::dec },
+    { "0123/456",  -1, "123/456", ios::dec },
+    { "123/0456",  -1, "123/456", ios::dec },
+    { "0123/0456", -1, "123/456", ios::dec },
+
+    { "123/456",   -1, "83/302", ios::oct },
+    { "0123/456",  -1, "83/302", ios::oct },
+    { "123/0456",  -1, "83/302", ios::oct },
+    { "0123/0456", -1, "83/302", ios::oct },
+
+    { "ab",   -1, "171",  ios::hex },
+    { "cd",   -1, "205",  ios::hex },
+    { "ef",   -1, "239",  ios::hex },
+
+    { "0/0",     -1, "0/0", (ios::fmtflags) 0 },
+    { "5/8",     -1, "5/8", (ios::fmtflags) 0 },
+    { "0x5/0x8", -1, "5/8", (ios::fmtflags) 0 },
+
+    { "123/456",   -1, "123/456",  (ios::fmtflags) 0 },
+    { "123/0456",  -1, "123/302",  (ios::fmtflags) 0 },
+    { "123/0x456", -1, "123/1110", (ios::fmtflags) 0 },
+    { "123/0X456", -1, "123/1110", (ios::fmtflags) 0 },
+
+    { "0123/123",   -1, "83/123", (ios::fmtflags) 0 },
+    { "0123/0123",  -1, "83/83",  (ios::fmtflags) 0 },
+    { "0123/0x123", -1, "83/291", (ios::fmtflags) 0 },
+    { "0123/0X123", -1, "83/291", (ios::fmtflags) 0 },
+
+    { "0x123/123",   -1, "291/123", (ios::fmtflags) 0 },
+    { "0X123/0123",  -1, "291/83",  (ios::fmtflags) 0 },
+    { "0x123/0x123", -1, "291/291", (ios::fmtflags) 0 },
+
+    { " 123",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws
+    { " 123", -1, "123", ios::skipws },
+
+    { "123 /456",    3, "123",  (ios::fmtflags) 0 },
+    { "123/ 456",    4,  NULL,  (ios::fmtflags) 0 },
+    { "123/"    ,   -1,  NULL,  (ios::fmtflags) 0 },
+    { "123 /456",    3, "123",  ios::skipws },
+    { "123/ 456",    4,  NULL,  ios::skipws },
+  };
+
+  mpq_t      got, want;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
+  long       got_si, want_si;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      size_t input_length = strlen (data[i].input);
+      want_pos = (data[i].want_pos == -1
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpq_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpq_set_ui (want, 0L, 1L);
+
+      if (option_check_standard
+          && mpz_fits_slong_p (mpq_numref(want))
+          && mpz_cmp_ui (mpq_denref(want), 1L) == 0
+          && strchr (data[i].input, '/') == NULL)
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+          want_si = mpz_get_si (mpq_numref(want));
+
+          input >> got_si;
+          got_ok = !input.fail();
+          got_eof = input.eof();
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpq");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && want_si != got_si)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpq");
+              cout << "  got_si:  " << got_si << "\n";
+              cout << "  want_si: " << want_si << "\n";
+            }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpq");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpq");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+        mpq_set_si (got, 0xDEAD, 0xBEEF);
+
+        input >> got;
+        got_ok = !input.fail();
+	got_eof = input.eof();
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpq operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        // don't use mpq_equal, since we allow non-normalized values to be
+        // read, which can trigger ASSERTs in mpq_equal
+        if (want_ok && (mpz_cmp (mpq_numref (got), mpq_numref(want)) != 0
+                        || mpz_cmp (mpq_denref (got), mpq_denref(want)) != 0))
+          {
+            WRONG ("mpq operator>> wrong result");
+            mpq_trace ("  got ", got);
+            mpq_trace ("  want", want);
+            abort ();
+          }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpq operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpq operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+
+void
+check_mpf (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",      -1, "0", (ios::fmtflags) 0 },
+    { "+0",     -1, "0", (ios::fmtflags) 0 },
+    { "-0",     -1, "0", (ios::fmtflags) 0 },
+    { "0.0",    -1, "0", (ios::fmtflags) 0 },
+    { "0.",     -1, "0", (ios::fmtflags) 0 },
+    { ".0",     -1, "0", (ios::fmtflags) 0 },
+    { "+.0",    -1, "0", (ios::fmtflags) 0 },
+    { "-.0",    -1, "0", (ios::fmtflags) 0 },
+    { "+0.00",  -1, "0", (ios::fmtflags) 0 },
+    { "-0.000", -1, "0", (ios::fmtflags) 0 },
+    { "+0.00",  -1, "0", (ios::fmtflags) 0 },
+    { "-0.000", -1, "0", (ios::fmtflags) 0 },
+    { "0.0e0",  -1, "0", (ios::fmtflags) 0 },
+    { "0.e0",   -1, "0", (ios::fmtflags) 0 },
+    { ".0e0",   -1, "0", (ios::fmtflags) 0 },
+    { "0.0e-0", -1, "0", (ios::fmtflags) 0 },
+    { "0.e-0",  -1, "0", (ios::fmtflags) 0 },
+    { ".0e-0",  -1, "0", (ios::fmtflags) 0 },
+    { "0.0e+0", -1, "0", (ios::fmtflags) 0 },
+    { "0.e+0",  -1, "0", (ios::fmtflags) 0 },
+    { ".0e+0",  -1, "0", (ios::fmtflags) 0 },
+
+    { "1",  -1,  "1", (ios::fmtflags) 0 },
+    { "+1", -1,  "1", (ios::fmtflags) 0 },
+    { "-1", -1, "-1", (ios::fmtflags) 0 },
+
+    { " 0",  0,  NULL, (ios::fmtflags) 0 },  // not without skipws
+    { " 0",  -1, "0", ios::skipws },
+    { " +0", -1, "0", ios::skipws },
+    { " -0", -1, "0", ios::skipws },
+
+    { "+-123", 1, NULL, (ios::fmtflags) 0 },
+    { "-+123", 1, NULL, (ios::fmtflags) 0 },
+    { "1e+-123", 3, NULL, (ios::fmtflags) 0 },
+    { "1e-+123", 3, NULL, (ios::fmtflags) 0 },
+
+    { "e123",   0, NULL, (ios::fmtflags) 0 }, // at least one mantissa digit
+    { ".e123",  1, NULL, (ios::fmtflags) 0 },
+    { "+.e123", 2, NULL, (ios::fmtflags) 0 },
+    { "-.e123", 2, NULL, (ios::fmtflags) 0 },
+
+    { "123e",   4, NULL, (ios::fmtflags) 0 }, // at least one exponent digit
+    { "123e-",  5, NULL, (ios::fmtflags) 0 },
+    { "123e+",  5, NULL, (ios::fmtflags) 0 },
+  };
+
+  mpf_t      got, want;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
+  double     got_d, want_d;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      size_t input_length = strlen (data[i].input);
+      want_pos = (data[i].want_pos == -1
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpf_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpf_set_ui (want, 0L);
+
+      want_d = mpf_get_d (want);
+      if (option_check_standard && mpf_cmp_d (want, want_d) == 0)
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+
+          input >> got_d;
+          got_ok = !input.fail();
+          got_eof = input.eof();
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpf");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && want_d != got_d)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpf");
+              cout << "  got:   " << got_d << "\n";
+              cout << "  want:  " << want_d << "\n";
+            }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpf");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpf");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+
+        mpf_set_ui (got, 0xDEAD);
+        input >> got;
+        got_ok = !input.fail();
+	got_eof = input.eof();
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpf operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        if (want_ok && mpf_cmp (got, want) != 0)
+          {
+            WRONG ("mpf operator>> wrong result");
+            mpf_trace ("  got ", got);
+            mpf_trace ("  want", want);
+            abort ();
+          }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpf operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpf operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_standard = true;
+
+  tests_start ();
+
+  check_putback_tellg ();
+  check_mpz ();
+  check_mpq ();
+  check_mpf ();
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/cxx/t-locale.cc b/tests/cxx/t-locale.cc
new file mode 100644
index 0000000..14e95e0
--- /dev/null
+++ b/tests/cxx/t-locale.cc

@@ -0,0 +1,194 @@
+/* Test locale support in C++ functions.
+
+Copyright 2001-2003, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <clocale>
+#include <iostream>
+#include <cstdlib>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+extern "C" {
+  char point_string[2];
+}
+
+#if HAVE_STD__LOCALE
+// Like std::numpunct, but with decimal_point coming from point_string[].
+class my_numpunct : public numpunct<char> {
+ public:
+  explicit my_numpunct (size_t r = 0) : numpunct<char>(r) { }
+ protected:
+  char do_decimal_point() const { return point_string[0]; }
+};
+#endif
+
+void
+set_point (char c)
+{
+  point_string[0] = c;
+
+#if HAVE_STD__LOCALE
+  locale loc (locale::classic(), new my_numpunct ());
+  locale::global (loc);
+#endif
+}
+
+
+void
+check_input (void)
+{
+  static const struct {
+    const char  *str1;
+    const char  *str2;
+    double      want;
+  } data[] = {
+
+    { "1","",   1.0 },
+    { "1","0",  1.0 },
+    { "1","00", 1.0 },
+
+    { "","5",    0.5 },
+    { "0","5",   0.5 },
+    { "00","5",  0.5 },
+    { "00","50", 0.5 },
+
+    { "1","5",    1.5 },
+    { "1","5e1", 15.0 },
+  };
+
+  static char point[] = {
+    '.', ',', 'x', '\xFF'
+  };
+
+  mpf_t  got;
+  mpf_init (got);
+
+  for (size_t i = 0; i < numberof (point); i++)
+    {
+      set_point (point[i]);
+
+      for (int neg = 0; neg <= 1; neg++)
+        {
+          for (size_t j = 0; j < numberof (data); j++)
+            {
+              string str = string(data[j].str1)+point[i]+string(data[j].str2);
+              if (neg)
+                str = "-" + str;
+
+              istringstream is (str.c_str());
+
+              mpf_set_ui (got, 123);   // dummy initial value
+
+              if (! (is >> got))
+                {
+                  cout << "istream mpf_t operator>> error\n";
+                  cout << "  point " << point[i] << "\n";
+                  cout << "  str   \"" << str << "\"\n";
+                  cout << "  localeconv point \""
+                       << GMP_DECIMAL_POINT << "\"\n";
+                  abort ();
+                }
+
+              double want = data[j].want;
+              if (neg)
+                want = -want;
+              if (mpf_cmp_d (got, want) != 0)
+                {
+                  cout << "istream mpf_t operator>> wrong\n";
+                  cout << "  point " << point[i] << "\n";
+                  cout << "  str   \"" << str << "\"\n";
+                  cout << "  got   " << got << "\n";
+                  cout << "  want  " << want << "\n";
+                  cout << "  localeconv point \""
+                       << GMP_DECIMAL_POINT << "\"\n";
+                  abort ();
+                }
+            }
+        }
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_output (void)
+{
+  static char point[] = {
+    '.', ',', 'x', '\xFF'
+  };
+
+  for (size_t i = 0; i < numberof (point); i++)
+    {
+      set_point (point[i]);
+      ostringstream  got;
+
+      mpf_t  f;
+      mpf_init (f);
+      mpf_set_d (f, 1.5);
+      got << f;
+      mpf_clear (f);
+
+      string  want = string("1") + point[i] + string("5");
+
+      if (want.compare (got.str()) != 0)
+        {
+          cout << "ostream mpf_t operator<< doesn't respect locale\n";
+          cout << "  point " << point[i] << "\n";
+          cout << "  got   \"" << got.str() << "\"\n";
+          cout << "  want  \"" << want      << "\"\n";
+          abort ();
+        }
+    }
+}
+
+int
+replacement_works (void)
+{
+  set_point ('x');
+  mpf_t  f;
+  mpf_init (f);
+  mpf_set_d (f, 1.5);
+  ostringstream s;
+  s << f;
+  mpf_clear (f);
+
+  return (s.str().compare("1x5") == 0);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  if (replacement_works())
+    {
+      check_input ();
+      check_output ();
+    }
+  else
+    {
+      cout << "Replacing decimal point didn't work, tests skipped\n";
+    }
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/cxx/t-misc.cc b/tests/cxx/t-misc.cc
new file mode 100644
index 0000000..1143e84
--- /dev/null
+++ b/tests/cxx/t-misc.cc

@@ -0,0 +1,397 @@
+/* Test mp*_class functions.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Note that we don't use <climits> for LONG_MIN, but instead our own
+   definitions in gmp-impl.h.  In g++ 2.95.4 (debian 3.0) under
+   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that
+   means long is 64-bit long, but it's only 32-bits, causing fatal compile
+   errors.  */
+
+#include "config.h"
+
+#include <string>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // mpz_class::fits_sint_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = INT_MIN; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
+    z--;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
+    z = INT_MAX; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
+    z++;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_uint_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = 0;        fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
+    z = UINT_MAX; fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_slong_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = LONG_MIN; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
+    z = LONG_MAX; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_ulong_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = 0;         fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    z--;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+    z = ULONG_MAX; fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    z++;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_sshort_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = SHRT_MIN; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+    z = SHRT_MAX; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_ushort_p
+  {
+    bool       fits;
+    mpz_class  z;
+    z = 0;         fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    z--;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+    z = USHRT_MAX; fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    z++;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::get_mpz_t
+  {
+    mpz_class  z(0);
+    mpz_ptr    p = z.get_mpz_t();
+    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);
+  }
+  {
+    mpz_class  z(0);
+    mpz_srcptr p = z.get_mpz_t();
+    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);
+  }
+
+  // mpz_class::get_d
+  // mpz_class::get_si
+  // mpz_class::get_ui
+  {
+    mpz_class  z(123);
+    { double d = z.get_d();  ASSERT_ALWAYS (d == 123.0); }
+    { long   l = z.get_si(); ASSERT_ALWAYS (l == 123L); }
+    { long   u = z.get_ui(); ASSERT_ALWAYS (u == 123L); }
+  }
+  {
+    mpz_class  z(-123);
+    { double d = z.get_d();  ASSERT_ALWAYS (d == -123.0); }
+    { long   l = z.get_si(); ASSERT_ALWAYS (l == -123L); }
+  }
+
+  // mpz_class::get_str
+  {
+    mpz_class  z(123);
+    string     s;
+    s = z.get_str(); ASSERT_ALWAYS (s == "123");
+    s = z.get_str(16); ASSERT_ALWAYS (s == "7b");
+    s = z.get_str(-16); ASSERT_ALWAYS (s == "7B");
+  }
+
+  // mpz_class::set_str
+  {
+    mpz_class  z;
+    int        ret;
+    ret = z.set_str ("123", 10);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("7b",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("7B",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("0x7B", 0);  ASSERT_ALWAYS (ret == 0 && z == 123);
+
+    ret = z.set_str (string("123"), 10);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("7b"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("7B"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("0x7B"), 0);  ASSERT_ALWAYS (ret == 0 && z == 123);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // mpq_class::canonicalize
+  {
+    mpq_class  q(12,9);
+    q.canonicalize();
+    ASSERT_ALWAYS (q.get_num() == 4);
+    ASSERT_ALWAYS (q.get_den() == 3);
+  }
+
+  // mpq_class::get_d
+  {
+    mpq_class  q(123);
+    { double d = q.get_d();  ASSERT_ALWAYS (d == 123.0); }
+  }
+  {
+    mpq_class  q(-123);
+    { double d = q.get_d();  ASSERT_ALWAYS (d == -123.0); }
+  }
+
+  // mpq_class::get_mpq_t
+  {
+    mpq_class  q(0);
+    mpq_ptr    p = q.get_mpq_t();
+    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);
+  }
+  {
+    mpq_class  q(0);
+    mpq_srcptr p = q.get_mpq_t();
+    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);
+  }
+
+  // mpq_class::get_num, mpq_class::get_den
+  {
+    const mpq_class  q(4,5);
+    mpz_class  z;
+    z = q.get_num(); ASSERT_ALWAYS (z == 4);
+    z = q.get_den(); ASSERT_ALWAYS (z == 5);
+  }
+
+  // mpq_class::get_num_mpz_t, mpq_class::get_den_mpz_t
+  {
+    mpq_class  q(4,5);
+    mpz_ptr    p;
+    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
+    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
+  }
+  {
+    const mpq_class  q(4,5);
+    mpz_srcptr p;
+    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
+    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
+  }
+
+  // mpq_class::get_str
+  {
+    mpq_class  q(17,11);
+    string     s;
+    s = q.get_str();    ASSERT_ALWAYS (s == "17/11");
+    s = q.get_str(10);  ASSERT_ALWAYS (s == "17/11");
+    s = q.get_str(16);  ASSERT_ALWAYS (s == "11/b");
+    s = q.get_str(-16); ASSERT_ALWAYS (s == "11/B");
+  }
+
+  // mpq_class::set_str
+  {
+    mpq_class  q;
+    int        ret;
+    ret = q.set_str ("123", 10);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("4/5", 10);     ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));
+    ret = q.set_str ("7b",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("7B",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("0x7B", 0);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("0x10/17", 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));
+
+    ret = q.set_str (string("4/5"), 10);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));
+    ret = q.set_str (string("123"), 10);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("7b"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("7B"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("0x7B"), 0);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("0x10/17"), 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));
+  }
+}
+
+void
+check_mpf (void)
+{
+  // mpf_class::fits_sint_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(int));
+    f = INT_MIN; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
+    f--;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
+    f = INT_MAX; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
+    f++;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_uint_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(int));
+    f = 0;        fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
+    f = UINT_MAX; fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_slong_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(long));
+    f = LONG_MIN; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
+    f = LONG_MAX; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_ulong_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(long));
+    f = 0;         fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    f--;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+    f = ULONG_MAX; fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    f++;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_sshort_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(short));
+    f = SHRT_MIN; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+    f = SHRT_MAX; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_ushort_p
+  {
+    bool       fits;
+    mpf_class  f (0, 2*8*sizeof(short));
+    f = 0;         fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    f--;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+    f = USHRT_MAX; fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    f++;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::get_d
+  // mpf_class::get_si
+  // mpf_class::get_ui
+  {
+    mpf_class  f(123);
+    { double d = f.get_d();  ASSERT_ALWAYS (d == 123.0); }
+    { long   l = f.get_si(); ASSERT_ALWAYS (l == 123L); }
+    { long   u = f.get_ui(); ASSERT_ALWAYS (u == 123L); }
+  }
+  {
+    mpf_class  f(-123);
+    { double d = f.get_d();  ASSERT_ALWAYS (d == -123.0); }
+    { long   l = f.get_si(); ASSERT_ALWAYS (l == -123L); }
+  }
+
+  // mpf_class::get_prec
+  {
+    mpf_class  f;
+    ASSERT_ALWAYS (f.get_prec() == mpf_get_default_prec());
+  }
+
+  // mpf_class::get_str
+  {
+    mpf_class  f(123);
+    string     s;
+    mp_exp_t   e;
+    s = f.get_str(e);        ASSERT_ALWAYS (s == "123" && e == 3);
+    s = f.get_str(e,  16);   ASSERT_ALWAYS (s == "7b"  && e == 2);
+    s = f.get_str(e, -16);   ASSERT_ALWAYS (s == "7B"  && e == 2);
+    s = f.get_str(e, 10, 2); ASSERT_ALWAYS (s == "12"  && e == 3);
+    s = f.get_str(e, 10, 1); ASSERT_ALWAYS (s == "1"   && e == 3);
+  }
+
+  // mpf_class::set_str
+  {
+    mpf_class  f;
+    int        ret;
+    ret = f.set_str ("123",     10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("123e1",   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);
+    ret = f.set_str ("1230e-1", 10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7b",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7B",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7B@1",    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);
+    ret = f.set_str ("7B0@-1",  16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+
+    ret = f.set_str (string("123"),     10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("123e1"),   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);
+    ret = f.set_str (string("1230e-1"), 10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7b"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7B"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7B@1"),    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);
+    ret = f.set_str (string("7B0@-1"),  16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+  }
+
+  // mpf_class::set_prec
+  {
+    mpf_class  f;
+    f.set_prec (256);
+    ASSERT_ALWAYS (f.get_prec () >= 256);
+  }
+
+  // mpf_class::set_prec_raw
+  {
+    mpf_class  f (0, 100 * GMP_NUMB_BITS);
+    f.set_prec_raw (5 * GMP_NUMB_BITS);
+    ASSERT_ALWAYS (f.get_prec () >= 5 * GMP_NUMB_BITS);
+    ASSERT_ALWAYS (f.get_prec () < 100 * GMP_NUMB_BITS);
+    f.set_prec_raw (100 * GMP_NUMB_BITS);
+  }
+}
+
+// std::numeric_limits
+void
+check_limits (void)
+{
+  // Check that the content is not private.
+  ASSERT_ALWAYS ( std::numeric_limits<mpz_class>::is_integer);
+  ASSERT_ALWAYS (!std::numeric_limits<mpf_class>::is_integer);
+
+  // Check that symbols are emitted.
+  ASSERT_ALWAYS (&std::numeric_limits<mpz_class>::is_integer
+	      != &std::numeric_limits<mpq_class>::is_integer);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+  check_limits();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-mix.cc b/tests/cxx/t-mix.cc
new file mode 100644
index 0000000..cb6733b
--- /dev/null
+++ b/tests/cxx/t-mix.cc

@@ -0,0 +1,82 @@
+/* Test legality of conversion between the different mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int f_z  (mpz_class){return 0;}
+int f_q  (mpq_class){return 1;}
+int f_f  (mpf_class){return 2;}
+int f_zq (mpz_class){return 0;}
+int f_zq (mpq_class){return 1;}
+int f_zf (mpz_class){return 0;}
+int f_zf (mpf_class){return 2;}
+int f_qf (mpq_class){return 1;}
+int f_qf (mpf_class){return 2;}
+int f_zqf(mpz_class){return 0;}
+int f_zqf(mpq_class){return 1;}
+int f_zqf(mpf_class){return 2;}
+
+void
+check (void)
+{
+  mpz_class z=42;
+  mpq_class q=33;
+  mpf_class f=18;
+
+  ASSERT_ALWAYS(f_z  (z)==0); ASSERT_ALWAYS(f_z  (-z)==0);
+  ASSERT_ALWAYS(f_q  (z)==1); ASSERT_ALWAYS(f_q  (-z)==1);
+  ASSERT_ALWAYS(f_q  (q)==1); ASSERT_ALWAYS(f_q  (-q)==1);
+  ASSERT_ALWAYS(f_f  (z)==2); ASSERT_ALWAYS(f_f  (-z)==2);
+  ASSERT_ALWAYS(f_f  (q)==2); ASSERT_ALWAYS(f_f  (-q)==2);
+  ASSERT_ALWAYS(f_f  (f)==2); ASSERT_ALWAYS(f_f  (-f)==2);
+  ASSERT_ALWAYS(f_zq (z)==0);
+  ASSERT_ALWAYS(f_zq (q)==1); ASSERT_ALWAYS(f_zq (-q)==1);
+  ASSERT_ALWAYS(f_zf (z)==0);
+  ASSERT_ALWAYS(f_zf (f)==2); ASSERT_ALWAYS(f_zf (-f)==2);
+  ASSERT_ALWAYS(f_qf (q)==1);
+  ASSERT_ALWAYS(f_qf (f)==2); ASSERT_ALWAYS(f_qf (-f)==2);
+  ASSERT_ALWAYS(f_zqf(z)==0);
+  ASSERT_ALWAYS(f_zqf(q)==1);
+  ASSERT_ALWAYS(f_zqf(f)==2); ASSERT_ALWAYS(f_zqf(-f)==2);
+
+  ASSERT_ALWAYS(f_zqf(mpz_class(z))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-z))==0);
+  ASSERT_ALWAYS(f_zqf(mpz_class(q))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-q))==0);
+  ASSERT_ALWAYS(f_zqf(mpz_class(f))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-f))==0);
+  ASSERT_ALWAYS(f_zqf(mpq_class(z))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-z))==1);
+  ASSERT_ALWAYS(f_zqf(mpq_class(q))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-q))==1);
+  ASSERT_ALWAYS(f_zqf(mpq_class(f))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-f))==1);
+  ASSERT_ALWAYS(f_zqf(mpf_class(z))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-z))==2);
+  ASSERT_ALWAYS(f_zqf(mpf_class(q))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-q))==2);
+  ASSERT_ALWAYS(f_zqf(mpf_class(f))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-f))==2);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  check();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ops.cc b/tests/cxx/t-ops.cc
new file mode 100644
index 0000000..ecc6bd0
--- /dev/null
+++ b/tests/cxx/t-ops.cc

@@ -0,0 +1,753 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2001-2003, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpz_class a(1);
+    mpz_class b;
+    b = +a; ASSERT_ALWAYS(b == 1);
+  }
+
+  // operator-
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // operator~
+  {
+    mpz_class a(3);
+    mpz_class b;
+    b = ~a; ASSERT_ALWAYS(b == -4);
+  }
+
+  // abs
+  {
+    mpz_class a(-123);
+    mpz_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+    a <<= 300;
+    b = abs(a); ASSERT_ALWAYS(a + b == 0);
+  }
+
+  // sqrt
+  {
+    mpz_class a(25);
+    mpz_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 5);
+  }
+  {
+    mpz_class a(125);
+    mpz_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 11); // round toward zero
+  }
+
+  // sgn
+  {
+    mpz_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpz_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpz_class a(1), b(2);
+    mpz_class c;
+    c = a + b; ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpz_class a(3);
+    signed int b = 4;
+    mpz_class c;
+    c = a + b; ASSERT_ALWAYS(c == 7);
+  }
+  {
+    mpz_class a(5);
+    double b = 6.0;
+    mpz_class c;
+    c = b + a; ASSERT_ALWAYS(c == 11);
+  }
+
+  // operator-
+  {
+    mpz_class a(3), b(6);
+    mpz_class c;
+    c = a - b; ASSERT_ALWAYS(c == -3);
+  }
+
+  // operator*
+  {
+    mpz_class a(-2), b(4);
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+  }
+  {
+    mpz_class a(2);
+    long b = -4;
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+    c = b * a; ASSERT_ALWAYS(c == -8);
+  }
+  {
+    mpz_class a(-2);
+    unsigned long b = 4;
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+    c = b * a; ASSERT_ALWAYS(c == -8);
+  }
+
+  // operator/ and operator%
+  {
+    mpz_class a(12), b(4);
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == 3);
+    c = a % b; ASSERT_ALWAYS(c == 0);
+  }
+  {
+    mpz_class a(7), b(5);
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == 1);
+    c = a % b; ASSERT_ALWAYS(c == 2);
+  }
+  {
+    mpz_class a(-10);
+    signed int ai = -10;
+    mpz_class b(3);
+    signed int bi = 3;
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == -3);
+    c = a % b;  ASSERT_ALWAYS(c == -1);
+    c = a / bi; ASSERT_ALWAYS(c == -3);
+    c = a % bi; ASSERT_ALWAYS(c == -1);
+    c = ai / b; ASSERT_ALWAYS(c == -3);
+    c = ai % b; ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpz_class a(-10);
+    signed int ai = -10;
+    mpz_class b(-3);
+    signed int bi = -3;
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == 3);
+    c = a % b;  ASSERT_ALWAYS(c == -1);
+    c = a / bi; ASSERT_ALWAYS(c == 3);
+    c = a % bi; ASSERT_ALWAYS(c == -1);
+    c = ai / b; ASSERT_ALWAYS(c == 3);
+    c = ai % b; ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpz_class a (LONG_MIN);
+    signed long ai = LONG_MIN;
+    mpz_class b = - mpz_class (LONG_MIN);
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == -1);
+    c = a % b;  ASSERT_ALWAYS(c == 0);
+    c = ai / b; ASSERT_ALWAYS(c == -1);
+    c = ai % b; ASSERT_ALWAYS(c == 0);
+  }
+
+  // operator&
+  // operator|
+  // operator^
+
+  // operator<<
+  {
+    mpz_class a(3);
+    unsigned int b = 4;
+    mpz_class c;
+    c = a << b; ASSERT_ALWAYS(c == 48);
+  }
+
+  // operator>>
+  {
+    mpz_class a(127);
+    unsigned int b = 4;
+    mpz_class c;
+    c = a >> b; ASSERT_ALWAYS(c == 7);
+  }
+
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // cmp
+  {
+    mpz_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+
+
+  // ternary operators
+
+  // mpz_addmul
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    unsigned int c = 2;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    signed int c = 2;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    double c = 2.0;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b + c; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    unsigned int c = 2;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    signed int c = 3;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    signed int c = 2;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    double c = 3.0;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    double c = 2.0;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+
+  // mpz_submul
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    unsigned int c = 2;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    signed int c = 2;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    double c = 2.0;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b - c; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    unsigned int c = 2;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    signed int c = 3;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    signed int c = 2;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    double c = 3.0;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    double c = 2.0;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpq_class a(1, 2);
+    mpq_class b;
+    b = +a; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // operator-
+  {
+    mpq_class a(3, 4);
+    mpq_class b;
+    b = -a; ASSERT_ALWAYS(b == -0.75);
+  }
+
+  // abs
+  {
+    mpq_class a(-123);
+    mpq_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // sgn
+  {
+    mpq_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpq_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c;
+    c = a + b; ASSERT_ALWAYS(c == 1.25);
+  }
+  {
+    mpq_class a(1, 2);
+    signed int b = 2;
+    mpq_class c;
+    c = a + b; ASSERT_ALWAYS(c == 2.5);
+  }
+  {
+    mpq_class a(1, 2);
+    double b = 1.5;
+    mpq_class c;
+    c = b + a; ASSERT_ALWAYS(c == 2);
+  }
+
+  // operator-
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c;
+    c = a - b; ASSERT_ALWAYS(c == -0.25);
+  }
+
+  // operator*
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpq_class c;
+    c = a * b; ASSERT_ALWAYS(c == 0.25);
+    c = b * b; ASSERT_ALWAYS(c == 0.5625);
+  }
+
+  // operator/
+  {
+    mpq_class a(1, 2), b(2, 3);
+    mpq_class c;
+    c = a / b; ASSERT_ALWAYS(c == 0.75);
+  }
+  {
+    mpq_class one = 1;
+    mpq_class x(2, 5);
+    ASSERT_ALWAYS(1 / x == one / x);
+    ASSERT_ALWAYS(1u / x == one / x);
+    x = (-1) / x;
+    ASSERT_ALWAYS(x == -2.5);
+    ASSERT_ALWAYS(0 / x == 0);
+    ASSERT_ALWAYS(0u / x == 0);
+  }
+
+  // operator<<
+  // operator>>
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // cmp
+  {
+    mpq_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    mpz_class b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpf_class a(1);
+    mpf_class b;
+    b = +a; ASSERT_ALWAYS(b == 1);
+  }
+
+  // operator-
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // abs
+  {
+    mpf_class a(-123);
+    mpf_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // trunc
+  {
+    mpf_class a(1.5);
+    mpf_class b;
+    b = trunc(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(-1.5);
+    mpf_class b;
+    b = trunc(a); ASSERT_ALWAYS(b == -1);
+  }
+
+  // floor
+  {
+    mpf_class a(1.9);
+    mpf_class b;
+    b = floor(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(-1.1);
+    mpf_class b;
+    b = floor(a); ASSERT_ALWAYS(b == -2);
+  }
+
+  // ceil
+  {
+    mpf_class a(1.1);
+    mpf_class b;
+    b = ceil(a); ASSERT_ALWAYS(b == 2);
+  }
+  {
+    mpf_class a(-1.9);
+    mpf_class b;
+    b = ceil(a); ASSERT_ALWAYS(b == -1);
+  }
+
+  // sqrt
+  {
+    mpf_class a(25);
+    mpf_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 5);
+  }
+  {
+    mpf_class a(2.25);
+    mpf_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 1.5);
+  }
+
+  // sgn
+  {
+    mpf_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpf_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpf_class a(1), b(2);
+    mpf_class c;
+    c = a + b; ASSERT_ALWAYS(c == 3);
+  }
+
+  // operator-
+  {
+    mpf_class a(3), b(4);
+    mpf_class c;
+    c = a - b; ASSERT_ALWAYS(c == -1);
+  }
+
+  // operator*
+  {
+    mpf_class a(2), b(5);
+    mpf_class c;
+    c = a * b; ASSERT_ALWAYS(c == 10);
+  }
+
+  // operator/
+  {
+    mpf_class a(7), b(4);
+    mpf_class c;
+    c = a / b; ASSERT_ALWAYS(c == 1.75);
+  }
+
+  // operator<<
+  // operator>>
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // hypot
+  {
+    mpf_class a(3), b(4);
+    mpf_class c;
+    c = hypot(a, b); ASSERT_ALWAYS(c == 5);
+  }
+
+  // cmp
+  {
+    mpf_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    mpz_class b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    mpq_class b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ops2.h b/tests/cxx/t-ops2.h
new file mode 100644
index 0000000..f8898ee
--- /dev/null
+++ b/tests/cxx/t-ops2.h

@@ -0,0 +1,82 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <math.h>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define CHECK1(Type,a,fun) \
+  ASSERT_ALWAYS(fun((Type)(a))==fun(a))
+#define CHECK(Type1,Type2,a,b,op) \
+  ASSERT_ALWAYS(((Type1)(a) op (Type2)(b))==((a) op (b)))
+#define CHECK_G(Type,a,b,op) \
+  CHECK(Type,Type,a,b,op)
+#define CHECK_UI(Type,a,b,op) \
+  CHECK(Type,unsigned long,a,b,op); \
+  CHECK(unsigned long,Type,a,b,op)
+#define CHECK_SI(Type,a,b,op) \
+  CHECK(Type,long,a,b,op); \
+  CHECK(long,Type,a,b,op)
+#define CHECK_D(Type,a,b,op) \
+  CHECK(Type,double,a,b,op); \
+  CHECK(double,Type,a,b,op)
+#define CHECK_MPZ(Type,a,b,op) \
+  CHECK(Type,mpz_class,a,b,op); \
+  CHECK(mpz_class,Type,a,b,op)
+#define CHECK_MPQ(Type,a,b,op) \
+  CHECK(Type,mpq_class,a,b,op); \
+  CHECK(mpq_class,Type,a,b,op)
+#define CHECK_ALL_SIGNED(Type,a,b,op) \
+  CHECK_G(Type,a,b,op); \
+  CHECK_SI(Type,a,b,op); \
+  CHECK_D(Type,a,b,op)
+#define CHECK_ALL_SIGNS(Type,a,b,op) \
+  CHECK_ALL_SIGNED(Type,a,b,op); \
+  CHECK_ALL_SIGNED(Type,-(a),b,op); \
+  CHECK_ALL_SIGNED(Type,a,-(b),op); \
+  CHECK_ALL_SIGNED(Type,-(a),-(b),op)
+#define CHECK_ALL(Type,a,b,op) \
+  CHECK_ALL_SIGNED(Type,a,b,op); \
+  CHECK_UI(Type,a,b,op)
+#define CHECK_ALL_SIGNED_COMPARISONS(Type,a,b) \
+  CHECK_ALL_SIGNED(Type,a,b,<); \
+  CHECK_ALL_SIGNED(Type,a,b,>); \
+  CHECK_ALL_SIGNED(Type,a,b,<=); \
+  CHECK_ALL_SIGNED(Type,a,b,>=); \
+  CHECK_ALL_SIGNED(Type,a,b,==); \
+  CHECK_ALL_SIGNED(Type,a,b,!=)
+#define CHECK_ALL_SIGNS_COMPARISONS(Type,a,b) \
+  CHECK_ALL_SIGNS(Type,a,b,<); \
+  CHECK_ALL_SIGNS(Type,a,b,>); \
+  CHECK_ALL_SIGNS(Type,a,b,<=); \
+  CHECK_ALL_SIGNS(Type,a,b,>=); \
+  CHECK_ALL_SIGNS(Type,a,b,==); \
+  CHECK_ALL_SIGNS(Type,a,b,!=)
+#define CHECK_ALL_COMPARISONS(Type,a,b) \
+  CHECK_ALL(Type,a,b,<); \
+  CHECK_ALL(Type,a,b,>); \
+  CHECK_ALL(Type,a,b,<=); \
+  CHECK_ALL(Type,a,b,>=); \
+  CHECK_ALL(Type,a,b,==); \
+  CHECK_ALL(Type,a,b,!=)

diff --git a/tests/cxx/t-ops2f.cc b/tests/cxx/t-ops2f.cc
new file mode 100644
index 0000000..71c9e10
--- /dev/null
+++ b/tests/cxx/t-ops2f.cc

@@ -0,0 +1,87 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "t-ops2.h"
+
+void checkf (){
+  ASSERT_ALWAYS(sqrt(mpf_class(7))>2.64);
+  ASSERT_ALWAYS(sqrt(mpf_class(7))<2.65);
+  ASSERT_ALWAYS(sqrt(mpf_class(0))==0);
+  // TODO: add some consistency checks, as described in
+  // https://gmplib.org/list-archives/gmp-bugs/2013-February/002940.html
+  CHECK1(mpf_class,1.9,trunc);
+  CHECK1(mpf_class,1.9,floor);
+  CHECK1(mpf_class,1.9,ceil);
+  CHECK1(mpf_class,4.3,trunc);
+  CHECK1(mpf_class,4.3,floor);
+  CHECK1(mpf_class,4.3,ceil);
+  CHECK1(mpf_class,-7.1,trunc);
+  CHECK1(mpf_class,-7.1,floor);
+  CHECK1(mpf_class,-7.1,ceil);
+  CHECK1(mpf_class,-2.8,trunc);
+  CHECK1(mpf_class,-2.8,floor);
+  CHECK1(mpf_class,-2.8,ceil);
+  CHECK1(mpf_class,-1.5,trunc);
+  CHECK1(mpf_class,-1.5,floor);
+  CHECK1(mpf_class,-1.5,ceil);
+  CHECK1(mpf_class,2.5,trunc);
+  CHECK1(mpf_class,2.5,floor);
+  CHECK1(mpf_class,2.5,ceil);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))>4.9);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4.)>4.9);
+  ASSERT_ALWAYS(hypot(-3.,mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4l)>4.9);
+  ASSERT_ALWAYS(hypot(-3l,mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4ul)>4.9);
+  ASSERT_ALWAYS(hypot(3ul,mpf_class(4))<5.1);
+  CHECK(mpf_class,mpq_class,1.5,2.25,+);
+  CHECK(mpf_class,mpq_class,1.5,2.25,-);
+  CHECK(mpf_class,mpq_class,1.5,-2.25,*);
+  CHECK(mpf_class,mpq_class,1.5,-2,/);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,+);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,-);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,*);
+  CHECK_MPQ(mpf_class,-5.25,-0.5,/);
+  CHECK_MPQ(mpf_class,5,-2,<);
+  CHECK_MPQ(mpf_class,5,-2,>);
+  CHECK_MPQ(mpf_class,5,-2,<=);
+  CHECK_MPQ(mpf_class,5,-2,>=);
+  CHECK_MPQ(mpf_class,5,-2,==);
+  CHECK_MPQ(mpf_class,5,-2,!=);
+  CHECK_MPQ(mpf_class,0,0,<);
+  CHECK_MPQ(mpf_class,0,0,>);
+  CHECK_MPQ(mpf_class,0,0,<=);
+  CHECK_MPQ(mpf_class,0,0,>=);
+  CHECK_MPQ(mpf_class,0,0,==);
+  CHECK_MPQ(mpf_class,0,0,!=);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  // Enough precision for 1 + denorm_min
+  mpf_set_default_prec(DBL_MANT_DIG-DBL_MIN_EXP+42);
+  checkf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ops2qf.cc b/tests/cxx/t-ops2qf.cc
new file mode 100644
index 0000000..bd96f61
--- /dev/null
+++ b/tests/cxx/t-ops2qf.cc

@@ -0,0 +1,89 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012, 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "t-ops2.h"
+
+template<class T>
+void checkqf (){
+  CHECK_ALL(T,5.,0,+);
+  CHECK_ALL(T,5.,0,-);
+  CHECK_ALL(T,5.,2,+); CHECK_MPZ(T,5.,2,+);
+  CHECK_ALL(T,5.,2,-); CHECK_MPZ(T,5.,2,-);
+  CHECK_ALL(T,5.,2,*); CHECK_MPZ(T,5.,2,*);
+  CHECK_ALL(T,5.,2,/); CHECK_MPZ(T,5.,2,/);
+  CHECK_ALL(T,0.,2,/);
+  CHECK_ALL_SIGNS(T,11.,3,+);
+  CHECK_ALL_SIGNS(T,11.,3,-);
+  CHECK_ALL_SIGNS(T,13.,1,+);
+  CHECK_ALL_SIGNS(T,13.,1,-);
+  CHECK_ALL_SIGNS(T,11.,3,*);
+  CHECK_ALL_SIGNS(T,11.,4,/);
+  CHECK_SI(T,LONG_MIN,1,*);
+  CHECK_SI(T,0,3,*);
+  CHECK_ALL_COMPARISONS(T,5.,2);
+  CHECK_ALL_SIGNS_COMPARISONS(T,11.,3);
+  CHECK_MPZ(T,5,-2,<);
+  CHECK_MPZ(T,5,-2,>);
+  CHECK_MPZ(T,5,-2,<=);
+  CHECK_MPZ(T,5,-2,>=);
+  CHECK_MPZ(T,5,-2,==);
+  CHECK_MPZ(T,5,-2,!=);
+  CHECK_MPZ(T,0,0,<);
+  CHECK_MPZ(T,0,0,>);
+  CHECK_MPZ(T,0,0,<=);
+  CHECK_MPZ(T,0,0,>=);
+  CHECK_MPZ(T,0,0,==);
+  CHECK_MPZ(T,0,0,!=);
+  ASSERT_ALWAYS(T(6)<<2==6.*4);
+  ASSERT_ALWAYS(T(6)>>2==6./4);
+  ASSERT_ALWAYS(T(-13)<<2==-13.*4);
+  ASSERT_ALWAYS(T(-13)>>2==-13./4);
+  ASSERT_ALWAYS(++T(7)==8);
+  ASSERT_ALWAYS(++T(-8)==-7);
+  ASSERT_ALWAYS(--T(8)==7);
+  ASSERT_ALWAYS(--T(-7)==-8);
+  ASSERT_ALWAYS(+T(7)==7);
+  ASSERT_ALWAYS(+T(-8)==-8);
+  ASSERT_ALWAYS(-T(7)==-7);
+  ASSERT_ALWAYS(-T(-8)==8);
+  ASSERT_ALWAYS(abs(T(7))==7);
+  ASSERT_ALWAYS(abs(T(-8))==8);
+  ASSERT_ALWAYS(sgn(T(0))==0);
+  ASSERT_ALWAYS(sgn(T(9))==1);
+  ASSERT_ALWAYS(sgn(T(-17))==-1);
+  ASSERT_ALWAYS(T(1)+DBL_MAX>2);
+  ASSERT_ALWAYS(T(1)+DBL_MIN>1);
+  ASSERT_ALWAYS(T(1)+DBL_MIN<1.001);
+  ASSERT_ALWAYS(T(1)+std::numeric_limits<double>::denorm_min()>1);
+  ASSERT_ALWAYS(T(1)+std::numeric_limits<double>::denorm_min()<1.001);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  // Enough precision for 1 + denorm_min
+  mpf_set_default_prec(DBL_MANT_DIG-DBL_MIN_EXP+42);
+  checkqf<mpq_class>();
+  checkqf<mpf_class>();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ops2z.cc b/tests/cxx/t-ops2z.cc
new file mode 100644
index 0000000..78694b7
--- /dev/null
+++ b/tests/cxx/t-ops2z.cc

@@ -0,0 +1,126 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "t-ops2.h"
+
+void checkz (){
+  CHECK_ALL(mpz_class,5,2,+);
+  CHECK_ALL(mpz_class,5,2,-);
+  CHECK_ALL(mpz_class,5,2,*);
+  CHECK_ALL(mpz_class,5,2,/);
+  CHECK_ALL(mpz_class,5,2,%);
+  CHECK_ALL_COMPARISONS(mpz_class,5,2);
+  CHECK_ALL_SIGNS(mpz_class,11,3,+);
+  CHECK_ALL_SIGNS(mpz_class,11,3,-);
+  CHECK_ALL_SIGNS(mpz_class,11,3,*);
+  CHECK_ALL_SIGNS(mpz_class,11,3,/);
+  CHECK_ALL_SIGNS(mpz_class,11,3,%);
+  CHECK_ALL_SIGNS(mpz_class,17,2,*);
+  CHECK_ALL_SIGNS(mpz_class,17,2,/);
+  CHECK_ALL_SIGNS(mpz_class,17,2,%);
+  CHECK(unsigned long,mpz_class,5,-2,/);
+  CHECK(unsigned long,mpz_class,5,-2,%);
+  ASSERT_ALWAYS(7ul/mpz_class(1e35)==0);
+  ASSERT_ALWAYS(7ul%mpz_class(1e35)==7);
+  ASSERT_ALWAYS(7ul/mpz_class(-1e35)==0);
+  ASSERT_ALWAYS(7ul%mpz_class(-1e35)==7);
+  CHECK_ALL_SIGNS_COMPARISONS(mpz_class,11,3);
+  CHECK_ALL(mpz_class,6,3,&);
+  CHECK_ALL(mpz_class,6,3,|);
+  CHECK_ALL(mpz_class,6,3,^);
+  CHECK(mpz_class,unsigned long,6,2,<<);
+  CHECK(mpz_class,unsigned long,6,2,>>);
+  ASSERT_ALWAYS((mpz_class(-13)<<(unsigned long)2) == (-13)*4);
+  CHECK(mpz_class,unsigned long,-13,2,>>);
+  ASSERT_ALWAYS(++mpz_class(7)==8);
+  ASSERT_ALWAYS(++mpz_class(-8)==-7);
+  ASSERT_ALWAYS(--mpz_class(8)==7);
+  ASSERT_ALWAYS(--mpz_class(-7)==-8);
+  ASSERT_ALWAYS(~mpz_class(7)==-8);
+  ASSERT_ALWAYS(~mpz_class(-8)==7);
+  ASSERT_ALWAYS(+mpz_class(7)==7);
+  ASSERT_ALWAYS(+mpz_class(-8)==-8);
+  ASSERT_ALWAYS(-mpz_class(7)==-7);
+  ASSERT_ALWAYS(-mpz_class(-8)==8);
+  ASSERT_ALWAYS(abs(mpz_class(7))==7);
+  ASSERT_ALWAYS(abs(mpz_class(-8))==8);
+  ASSERT_ALWAYS(sqrt(mpz_class(7))==2);
+  ASSERT_ALWAYS(sqrt(mpz_class(0))==0);
+  ASSERT_ALWAYS(sgn(mpz_class(0))==0);
+  ASSERT_ALWAYS(sgn(mpz_class(9))==1);
+  ASSERT_ALWAYS(sgn(mpz_class(-17))==-1);
+  ASSERT_ALWAYS(mpz_class(1)+DBL_MAX>2);
+  ASSERT_ALWAYS(mpz_class(1)+DBL_MIN<2);
+  ASSERT_ALWAYS(mpz_class(1)+std::numeric_limits<double>::denorm_min()<2);
+  ASSERT_ALWAYS(gcd(mpz_class(6),mpz_class(8))==2);
+  ASSERT_ALWAYS(gcd(-mpz_class(6),mpz_class(8))==2);
+  ASSERT_ALWAYS(gcd(-mpz_class(6),-mpz_class(8))==2);
+  ASSERT_ALWAYS(gcd(mpz_class(6),8.f)==2);
+  ASSERT_ALWAYS(gcd(-mpz_class(6),static_cast<unsigned char>(8))==2);
+  ASSERT_ALWAYS(gcd(static_cast<long>(-6),mpz_class(5)+3)==2);
+  ASSERT_ALWAYS(lcm(mpz_class(6),mpz_class(8))==24);
+  ASSERT_ALWAYS(lcm(-mpz_class(6),mpz_class(8))==24);
+  ASSERT_ALWAYS(lcm(-mpz_class(6),-mpz_class(8))==24);
+  ASSERT_ALWAYS(lcm(mpz_class(6),static_cast<short>(8))==24);
+  ASSERT_ALWAYS(lcm(-mpz_class(6),static_cast<unsigned char>(8))==24);
+  ASSERT_ALWAYS(lcm(-6.,mpz_class(5)+3)==24);
+  ASSERT_ALWAYS(factorial(mpz_class(3))==6);
+  ASSERT_ALWAYS(factorial(mpz_class(5)-1)==24);
+  ASSERT_ALWAYS(mpz_class::factorial(mpz_class(3))==6);
+  ASSERT_ALWAYS(mpz_class::factorial(mpz_class(2)*2)==24);
+  ASSERT_ALWAYS(mpz_class::factorial(3)==6);
+  ASSERT_ALWAYS(mpz_class::factorial(3ul)==6);
+  ASSERT_ALWAYS(mpz_class::factorial(3.f)==6);
+  mpz_class ret;
+  try { ret=factorial(-mpz_class(3)); ASSERT_ALWAYS(0); }
+  catch (std::domain_error&) {}
+  try { ret=mpz_class::factorial(-2); ASSERT_ALWAYS(0); }
+  catch (std::domain_error&) {}
+  try { ret=factorial(mpz_class(1)<<300); ASSERT_ALWAYS(0); }
+  catch (std::bad_alloc&) {}
+  ASSERT_ALWAYS(mpz_class::primorial(mpz_class(3))==6);
+  ASSERT_ALWAYS(mpz_class::primorial(mpz_class(2)*2)==6);
+  ASSERT_ALWAYS(mpz_class::primorial(3)==6);
+  ASSERT_ALWAYS(mpz_class::primorial(3ul)==6);
+  ASSERT_ALWAYS(mpz_class::primorial(3.f)==6);
+  try { ret=primorial(-mpz_class(3)); ASSERT_ALWAYS(0); }
+  catch (std::domain_error&) {}
+  try { ret=mpz_class::primorial(-5); ASSERT_ALWAYS(0); }
+  catch (std::domain_error&) {}
+  try { ret=primorial(mpz_class(1)<<300); ASSERT_ALWAYS(0); }
+  catch (std::bad_alloc&) {}
+  ASSERT_ALWAYS(mpz_class::fibonacci(mpz_class(6))==8);
+  ASSERT_ALWAYS(mpz_class::fibonacci(mpz_class(2)*2)==3);
+  ASSERT_ALWAYS(mpz_class::fibonacci(3)==2);
+  ASSERT_ALWAYS(mpz_class::fibonacci(3ul)==2);
+  ASSERT_ALWAYS(mpz_class::fibonacci(3.f)==2);
+  ASSERT_ALWAYS(fibonacci(-mpz_class(6))==-8);
+  ASSERT_ALWAYS(mpz_class::fibonacci(-3)==2);
+  try { ret=fibonacci(mpz_class(1)<<300); ASSERT_ALWAYS(0); }
+  catch (std::bad_alloc&) {}
+}
+
+int
+main (void)
+{
+  tests_start();
+  checkz();
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ops3.cc b/tests/cxx/t-ops3.cc
new file mode 100644
index 0000000..baf49e1
--- /dev/null
+++ b/tests/cxx/t-ops3.cc

@@ -0,0 +1,132 @@
+/* Test mp*_class assignment operators (+=, -=, etc)
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+#define FOR_ALL_SIGNED_BUILTIN(F) \
+	F(signed char) \
+	F(signed short) \
+	F(signed int) \
+	F(signed long) \
+	F(float) \
+	F(double)
+
+#define FOR_ALL_BUILTIN(F) \
+	FOR_ALL_SIGNED_BUILTIN(F) \
+	F(char) \
+	F(unsigned char) \
+	F(unsigned short) \
+	F(unsigned int) \
+	F(unsigned long)
+
+#define FOR_ALL_GMPXX(F) \
+	F(mpz_class) \
+	F(mpq_class) \
+	F(mpf_class)
+
+template<class T,class U> void f(T t, U u){
+  T a=t;
+  ASSERT_ALWAYS((a+=u)==(t+u)); ASSERT_ALWAYS(a==(t+u));
+  ASSERT_ALWAYS((a-=u)==t); ASSERT_ALWAYS(a==t);
+  ASSERT_ALWAYS((a*=u)==(t*u)); ASSERT_ALWAYS(a==(t*u));
+  ASSERT_ALWAYS((a/=u)==t); ASSERT_ALWAYS(a==t);
+  ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+  ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T,class U> void g(T t, U u){
+  T a=t;
+  ASSERT_ALWAYS((a%=u)==(t%u)); ASSERT_ALWAYS(a==(t%u));
+  a=t;
+  ASSERT_ALWAYS((a&=u)==(t&u)); ASSERT_ALWAYS(a==(t&u));
+  a=t;
+  ASSERT_ALWAYS((a|=u)==(t|u)); ASSERT_ALWAYS(a==(t|u));
+  a=t;
+  ASSERT_ALWAYS((a^=u)==(t^u)); ASSERT_ALWAYS(a==(t^u));
+}
+
+template<class T> void h(T t){
+  T a=t;
+  ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+  ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T, class U> void ffs(T t, U u){
+#define F(V) f(t,(V)u);
+	FOR_ALL_SIGNED_BUILTIN(F)
+	FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+	FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class T, class U> void ff(T t, U u){
+#define F(V) f(t,(V)u);
+	FOR_ALL_BUILTIN(F)
+	FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+	FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class U> void ggs(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+	FOR_ALL_SIGNED_BUILTIN(F)
+#undef F
+	g(t,(mpz_class)u);
+	g(t,-(mpz_class)u);
+}
+
+template<class U> void gg(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+	FOR_ALL_BUILTIN(F)
+#undef F
+	g(t,(mpz_class)u);
+	g(t,-(mpz_class)u);
+}
+
+void check(){
+	mpz_class z=18;
+	mpq_class q(7,2);
+	mpf_class d=3.375;
+	h(z); h(q); h(d);
+	ff(z,13); ff(q,13); ff(d,13);
+	ffs(z,-42); ffs(q,-42); ffs(d,-42);
+	gg(z,33); ggs(z,-22);
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ostream.cc b/tests/cxx/t-ostream.cc
new file mode 100644
index 0000000..8550f67
--- /dev/null
+++ b/tests/cxx/t-ostream.cc

@@ -0,0 +1,449 @@
+/* Test ostream formatted output.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdlib>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+bool option_check_standard = false;
+
+
+#define CALL(expr)							\
+  do {									\
+    got.flags (data[i].flags);						\
+    got.width (data[i].width);						\
+    got.precision (data[i].precision);					\
+    if (data[i].fill == '\0')						\
+      got.fill (' ');							\
+    else								\
+      got.fill (data[i].fill);						\
+									\
+    if (! (expr))							\
+      {									\
+	cout << "\"got\" output error\n";				\
+	abort ();							\
+      }									\
+    if (got.width() != 0)						\
+      {									\
+	cout << "\"got\" width not reset to 0\n";			\
+	abort ();							\
+      }									\
+									\
+  } while (0)
+
+
+#define DUMP()								\
+  do {									\
+    cout << "  want:  |" << data[i].want << "|\n";			\
+    cout << "  got:   |" << got.str() << "|\n";				\
+    cout << "  width: " << data[i].width << "\n";			\
+    cout << "  prec:  " << got.precision() << "\n";			\
+    cout << "  flags: " << hex << (unsigned long) got.flags() << "\n";	\
+  } while (0)
+
+#define ABORT() \
+  do {          \
+    DUMP ();    \
+    abort ();   \
+  } while (0)
+
+void
+check_mpz (void)
+{
+  static const struct {
+    const char     *z;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0", ios::dec },
+
+    { "0", "0", ios::oct },
+    { "0", "0", ios::oct | ios::showbase },
+
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+
+    { "1", "****1", ios::dec, 5, 0, '*' },
+
+    { "-1", "   -1",  ios::dec | ios::right,    5 },
+    { "-1", "-   1",  ios::dec | ios::internal, 5 },
+    { "-1", "-1   ",  ios::dec | ios::left,     5 },
+
+    { "1", "   0x1", ios::hex | ios::showbase | ios::right,    6 },
+    { "1", "0x   1", ios::hex | ios::showbase | ios::internal, 6 },
+    { "1", "0x1   ", ios::hex | ios::showbase | ios::left,     6 },
+
+    { "1", "   +0x1", ios::hex | ios::showbase | ios::showpos | ios::right,
+      7 },
+    { "1", "+0x   1", ios::hex | ios::showbase | ios::showpos | ios::internal,
+      7 },
+    { "1", "+0x1   ", ios::hex | ios::showbase | ios::showpos | ios::left,
+      7 },
+
+    {  "123",    "7b", ios::hex },
+    {  "123",    "7B", ios::hex | ios::uppercase },
+    {  "123",  "0x7b", ios::hex | ios::showbase },
+    {  "123",  "0X7B", ios::hex | ios::showbase | ios::uppercase },
+    { "-123", "-0x7b", ios::hex | ios::showbase },
+    { "-123", "-0X7B", ios::hex | ios::showbase | ios::uppercase },
+
+    {  "123",   "173", ios::oct },
+    {  "123",   "173", ios::oct | ios::uppercase },
+    {  "123",  "0173", ios::oct | ios::showbase },
+    {  "123",  "0173", ios::oct | ios::showbase | ios::uppercase },
+    { "-123", "-0173", ios::oct | ios::showbase },
+    { "-123", "-0173", ios::oct | ios::showbase | ios::uppercase },
+
+  };
+
+  size_t  i;
+  mpz_t   z;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (z, data[i].z, 0);
+
+      if (option_check_standard
+	  && mpz_fits_slong_p (z)
+
+	  // no negatives or showpos in hex or oct
+	  && (((data[i].flags & ios::basefield) == ios::hex
+	       || (data[i].flags & ios::basefield) == ios::oct)
+	      ? (mpz_sgn (z) >= 0
+		 && ! (data[i].flags & ios::showpos))
+	      : 1)
+	  )
+	{
+	  ostringstream  got;
+	  long  n = mpz_get_si (z);
+	  CALL (got << n);
+	  if (got.str().compare (data[i].want) != 0)
+	    {
+	      cout << "check_mpz data[" << i
+		   << "] doesn't match standard ostream output\n";
+	      cout << "  z:     " << data[i].z << "\n";
+	      cout << "  n:     " << n << "\n";
+	      DUMP ();
+	    }
+	}
+
+      {
+	ostringstream  got;
+	CALL (got << z);
+	if (got.str().compare (data[i].want) != 0)
+	  {
+	    cout << "mpz operator<< wrong, data[" << i << "]\n";
+	    cout << "  z:     " << data[i].z << "\n";
+	    ABORT ();
+	  }
+      }
+    }
+
+  mpz_clear (z);
+}
+
+void
+check_mpq (void)
+{
+  static const struct {
+    const char     *q;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0", ios::dec },
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+
+    { "5/8", "5/8", ios::dec },
+    { "5/8", "0X5/0X8", ios::hex | ios::showbase | ios::uppercase },
+
+    // zero denominator with showbase
+    { "0/0",   "       0/0", ios::oct | ios::showbase, 10 },
+    { "0/0",   "       0/0", ios::dec | ios::showbase, 10 },
+    { "0/0",   "   0x0/0x0", ios::hex | ios::showbase, 10 },
+    { "123/0", "    0173/0", ios::oct | ios::showbase, 10 },
+    { "123/0", "     123/0", ios::dec | ios::showbase, 10 },
+    { "123/0", "  0x7b/0x0", ios::hex | ios::showbase, 10 },
+    { "123/0", "  0X7B/0X0", ios::hex | ios::showbase | ios::uppercase, 10 },
+    { "0/123", "    0/0173", ios::oct | ios::showbase, 10 },
+    { "0/123", "     0/123", ios::dec | ios::showbase, 10 },
+    { "0/123", "  0x0/0x7b", ios::hex | ios::showbase, 10 },
+    { "0/123", "  0X0/0X7B", ios::hex | ios::showbase | ios::uppercase, 10 },
+  };
+
+  size_t  i;
+  mpq_t   q;
+
+  mpq_init (q);
+
+#define mpq_integer_p(q)  (mpz_cmp_ui (mpq_denref(q), 1L) == 0)
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      MPZ_CHECK_FORMAT (mpq_numref (q));
+      MPZ_CHECK_FORMAT (mpq_denref (q));
+
+      if (option_check_standard
+	  && mpz_fits_slong_p (mpq_numref(q))
+	  && mpq_integer_p (q))
+	{
+	  ostringstream  got;
+	  long  n = mpz_get_si (mpq_numref(q));
+	  CALL (got << n);
+	  if (got.str().compare (data[i].want) != 0)
+	    {
+	      cout << "check_mpq data[" << i
+		   << "] doesn't match standard ostream output\n";
+	      cout << "  q:     " << data[i].q << "\n";
+	      cout << "  n:     " << n << "\n";
+	      DUMP ();
+	    }
+	}
+
+      {
+	ostringstream  got;
+	CALL (got << q);
+	if (got.str().compare (data[i].want) != 0)
+	  {
+	    cout << "mpq operator<< wrong, data[" << i << "]\n";
+	    cout << "  q:     " << data[i].q << "\n";
+	    ABORT ();
+	  }
+      }
+    }
+
+  mpq_clear (q);
+}
+
+
+void
+check_mpf (void)
+{
+  static const struct {
+    const char     *f;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0",            ios::dec },
+    { "0", "+0",           ios::dec | ios::showpos },
+    { "0", "0.00000",      ios::dec | ios::showpoint },
+    { "0", "0",            ios::dec | ios::fixed },
+    { "0", "0.",           ios::dec | ios::fixed | ios::showpoint },
+    { "0", "0.000000e+00", ios::dec | ios::scientific },
+    { "0", "0.000000e+00", ios::dec | ios::scientific | ios::showpoint },
+
+    { "0", "0",          ios::dec, 0, 4 },
+    { "0", "0.000",      ios::dec | ios::showpoint, 0, 4 },
+    { "0", "0.0000",     ios::dec | ios::fixed, 0, 4 },
+    { "0", "0.0000",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },
+    { "0", "0.0000e+00", ios::dec | ios::scientific, 0, 4 },
+    { "0", "0.0000e+00", ios::dec | ios::scientific | ios::showpoint, 0, 4 },
+
+    { "1", "1",       ios::dec },
+    { "1", "+1",      ios::dec | ios::showpos },
+    { "1", "1.00000", ios::dec | ios::showpoint },
+    { "1", "1",       ios::dec | ios::fixed },
+    { "1", "1.",      ios::dec | ios::fixed | ios::showpoint },
+    { "1", "1.000000e+00",   ios::dec | ios::scientific },
+    { "1", "1.000000e+00",  ios::dec | ios::scientific | ios::showpoint },
+
+    { "1", "1",          ios::dec,                   0, 4 },
+    { "1", "1.000",      ios::dec | ios::showpoint,  0, 4 },
+    { "1", "1.0000",     ios::dec | ios::fixed,      0, 4 },
+    { "1", "1.0000",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },
+    { "1", "1.0000e+00", ios::dec | ios::scientific, 0, 4 },
+    { "1", "1.0000e+00", ios::dec | ios::scientific | ios::showpoint, 0, 4 },
+
+    { "-1", "-1",        ios::dec | ios::showpos },
+
+    { "-1", "  -1",      ios::dec, 4 },
+    { "-1", "-  1",      ios::dec | ios::internal, 4 },
+    { "-1", "-1  ",      ios::dec | ios::left, 4 },
+
+    { "-1", "  -0x1",    ios::hex | ios::showbase, 6 },
+    { "-1", "-0x  1",    ios::hex | ios::showbase | ios::internal, 6 },
+    { "-1", "-0x1  ",    ios::hex | ios::showbase | ios::left, 6 },
+
+    {    "1", "*********1", ios::dec, 10, 4, '*' },
+    { "1234", "******1234", ios::dec, 10, 4, '*' },
+    { "1234", "*****1234.", ios::dec | ios::showpoint, 10, 4, '*' },
+
+    { "12345", "1.23e+04", ios::dec, 0, 3 },
+
+    { "12345", "12345.", ios::dec | ios::fixed | ios::showpoint },
+
+    { "1.9999999",    "2",     ios::dec, 0, 1 },
+    { "1.0009999999", "1.001", ios::dec, 0, 4 },
+    { "1.0001",       "1",     ios::dec, 0, 4 },
+    { "1.0004",       "1",     ios::dec, 0, 4 },
+    { "1.000555",     "1.001", ios::dec, 0, 4 },
+
+    { "1.0002",       "1.000", ios::dec | ios::fixed, 0, 3 },
+    { "1.0008",       "1.001", ios::dec | ios::fixed, 0, 3 },
+
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+    { "123",   "7b", ios::hex },
+    { "123", "0x7b", ios::hex | ios::showbase },
+    { "123", "0X7B", ios::hex | ios::showbase | ios::uppercase },
+
+    { "0", "0.000@+00", ios::hex | ios::scientific, 0, 3 },
+    { "256", "1.000@+02", ios::hex | ios::scientific, 0, 3 },
+
+    { "123",   "7.b@+01", ios::hex | ios::scientific, 0, 1 },
+    { "123",   "7.B@+01", ios::hex | ios::scientific | ios::uppercase, 0, 1 },
+    { "123", "0x7.b@+01", ios::hex | ios::scientific | ios::showbase, 0, 1 },
+    { "123", "0X7.B@+01",
+      ios::hex | ios::scientific | ios::showbase | ios::uppercase, 0, 1 },
+
+    { "1099511627776", "1.0@+10", ios::hex | ios::scientific, 0, 1 },
+    { "1099511627776", "1.0@+10",
+      ios::hex | ios::scientific | ios::uppercase, 0, 1 },
+
+    { "0.0625", "1.00@-01", ios::hex | ios::scientific, 0, 2 },
+
+    { "0", "0", ios::oct },
+    { "123",  "173", ios::oct },
+    { "123", "0173", ios::oct | ios::showbase },
+
+    // octal showbase suppressed for 0
+    { "0", "0", ios::oct | ios::showbase },
+    { ".125",    "00.1",  ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "00.01", ios::oct | ios::showbase, 0, 2 },
+    { ".125",    "00.1",  ios::fixed | ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "0.0",   ios::fixed | ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "00.01", ios::fixed | ios::oct | ios::showbase, 0, 2 },
+
+    {  "0.125",  "1.000000e-01", ios::oct | ios::scientific },
+    {  "0.125", "+1.000000e-01", ios::oct | ios::scientific | ios::showpos },
+    { "-0.125", "-1.000000e-01", ios::oct | ios::scientific },
+    { "-0.125", "-1.000000e-01", ios::oct | ios::scientific | ios::showpos },
+
+    { "0", "0.000e+00", ios::oct | ios::scientific, 0, 3 },
+    { "256",  "4.000e+02", ios::oct | ios::scientific, 0, 3 },
+    { "256", "04.000e+02", ios::oct | ios::scientific | ios::showbase, 0, 3 },
+    { "256",  "4.000E+02", ios::oct | ios::scientific | ios::uppercase, 0, 3 },
+    { "256", "04.000E+02",
+      ios::oct | ios::scientific | ios::showbase | ios::uppercase, 0, 3 },
+
+    { "16777216",    "1.000000e+08", ios::oct | ios::scientific },
+    { "16777216",    "1.000000E+08",
+      ios::oct | ios::scientific | ios::uppercase },
+    { "16777216",   "01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase },
+    { "16777216",   "01.000000E+08",
+      ios::oct | ios::scientific | ios::showbase | ios::uppercase },
+    { "16777216",  "+01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase | ios::showpos },
+    { "16777216",  "+01.000000E+08", ios::oct | ios::scientific
+      | ios::showbase | ios::showpos | ios::uppercase },
+    { "-16777216", "-01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase | ios::showpos },
+    { "-16777216", "-01.000000E+08", ios::oct | ios::scientific
+      | ios::showbase | ios::showpos | ios::uppercase },
+
+  };
+
+  size_t  i;
+  mpf_t   f, f2;
+  double  d;
+
+  mpf_init (f);
+  mpf_init (f2);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (f, data[i].f, 0);
+
+      d = mpf_get_d (f);
+      mpf_set_d (f2, d);
+      if (option_check_standard && mpf_cmp (f, f2) == 0
+	  && ! (data[i].flags & (ios::hex | ios::oct | ios::showbase)))
+	{
+	  ostringstream  got;
+	  CALL (got << d);
+	  if (got.str().compare (data[i].want) != 0)
+	    {
+	      cout << "check_mpf data[" << i
+		   << "] doesn't match standard ostream output\n";
+	      cout << "  f:     " << data[i].f << "\n";
+	      cout << "  d:     " << d << "\n";
+	      DUMP ();
+	    }
+	}
+
+      {
+	ostringstream  got;
+	CALL (got << f);
+	if (got.str().compare (data[i].want) != 0)
+	  {
+	    cout << "mpf operator<< wrong, data[" << i << "]\n";
+	    cout << "  f:     " << data[i].f << "\n";
+	    ABORT ();
+	  }
+      }
+    }
+
+  mpf_clear (f);
+  mpf_clear (f2);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_standard = true;
+
+  tests_start ();
+
+  check_mpz ();
+  check_mpq ();
+  check_mpf ();
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/cxx/t-prec.cc b/tests/cxx/t-prec.cc
new file mode 100644
index 0000000..72fca72
--- /dev/null
+++ b/tests/cxx/t-prec.cc

@@ -0,0 +1,216 @@
+/* Test precision of mpf_class expressions.
+
+Copyright 2001-2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+const int
+small_prec = 64, medium_prec = 128, large_prec = 192, very_large_prec = 256;
+
+#define ASSERT_ALWAYS_PREC(a, s, prec) \
+{                                      \
+  mpf_srcptr _a = a.get_mpf_t();       \
+  mpf_class _b(s, prec);               \
+  mpf_srcptr _c = _b.get_mpf_t();      \
+  ASSERT_ALWAYS(mpf_eq(_a, _c, prec)); \
+}
+
+
+
+void
+check_mpf (void)
+{
+  mpf_set_default_prec(medium_prec);
+
+  // simple expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(1 / f, very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = 1 / f;
+    ASSERT_ALWAYS_PREC
+      (g, "0.11111 11111 11111 11111 11111 11111 11111 11111 11111 11111"
+       "     11111 11111 11111 11111 11111 111", very_large_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = 1 / f;
+    ASSERT_ALWAYS_PREC
+      (g, "0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 667", very_large_prec);
+  }
+
+  // compound expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(-(-(-1 / f)), very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "-0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(0.0, very_large_prec);
+    h = 1/f + 1/g;
+    ASSERT_ALWAYS_PREC
+      (h, "0.44444 44444 44444 44444 44444 44444 44444 44444 44444 44444"
+       "     44444 44444 44444 44444 44444 444", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec), h(15.0, large_prec);
+    mpf_class i(0.0, very_large_prec);
+    i = f / g + h;
+    ASSERT_ALWAYS_PREC
+      (i, "15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 3", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(-(1 + f) / 3, very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "-1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 33", very_large_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = sqrt(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = hypot(1 + 5 / f, 1.0);
+    ASSERT_ALWAYS_PREC
+      (g, "1.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 67", very_large_prec);
+  }
+
+  // compound assignments
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(1.0, very_large_prec);
+    h -= f / g;
+    ASSERT_ALWAYS_PREC
+      (h, "0.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 667", very_large_prec);
+  }
+
+  // construction from expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC(g, "0.33333 33333 33333 33333", small_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.11111 11111 11111 11111 11111 11111 11111 1111", medium_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 6667", large_prec);
+  }
+
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(0.0, very_large_prec);
+    h = mpf_class(f / g + 1, large_prec);
+    ASSERT_ALWAYS_PREC
+      (h, "1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 333",
+       large_prec);
+  }
+
+  // mixed mpf/mpq expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = f - q;
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 67", very_large_prec);
+  }
+
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f - q, large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 667",
+       large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f - q);
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 667", medium_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f + q);
+    ASSERT_ALWAYS_PREC
+      (g, "15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33",
+       large_prec);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-rand.cc b/tests/cxx/t-rand.cc
new file mode 100644
index 0000000..d336d08
--- /dev/null
+++ b/tests/cxx/t-rand.cc

@@ -0,0 +1,148 @@
+/* Test gmp_randclass.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+/* all flavours of initialization */
+void
+check_randinit (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+  }
+
+  {
+    mpz_class a(0);
+    unsigned long c = 0, m2exp = 8;
+    gmp_randclass r(gmp_randinit_lc_2exp, a, c, m2exp);
+  }
+
+  {
+    unsigned long m2exp = 64;
+    gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);
+  }
+
+  /* gmp_randinit_lc_2exp_size, with excessive size */
+  {
+    try {
+      unsigned long m2exp = ULONG_MAX;
+      gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (length_error&) {
+    }
+  }
+
+  {
+    gmp_randclass r(gmp_randinit_mt);
+  }
+
+  /* obsolete, but still available */
+  {
+    gmp_randalg_t alg = GMP_RAND_ALG_LC;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+  {
+    gmp_randalg_t alg = GMP_RAND_ALG_DEFAULT;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+  {
+    gmp_randalg_t alg = (gmp_randalg_t) 0;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+}
+
+void
+check_mpz (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123);
+    unsigned int b = 256;
+    mpz_class c;
+    r.seed(a);
+    c = r.get_z_bits(b);
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(256);
+    unsigned long b = 123;
+    mpz_class c;
+    r.seed(b);
+    c = r.get_z_bits(a);
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123), b(256);
+    mpz_class c;
+    r.seed(a);
+    c = r.get_z_range(b);
+  }
+}
+
+void
+check_mpf (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123);
+    r.seed(a);
+    mpf_class b;
+    b = r.get_f();
+    mpf_class c(r.get_f());
+    ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+    mpf_class d(r.get_f(),212);
+    ASSERT_ALWAYS (d.get_prec() >= 212);
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    int a = 123, b = 198;
+    r.seed(a);
+    mpf_class c;
+    c = r.get_f(b);
+    ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+    mpf_class d(r.get_f(b));
+    ASSERT_ALWAYS (d.get_prec() >= 198);
+    mpf_class e(r.get_f(b)-r.get_f());
+    ASSERT_ALWAYS (e.get_prec() >= 198);
+    mpf_class f(r.get_f(60),300);
+    ASSERT_ALWAYS (f.get_prec() >= 300);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_randinit();
+  check_mpz();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-ternary.cc b/tests/cxx/t-ternary.cc
new file mode 100644
index 0000000..8d087fb
--- /dev/null
+++ b/tests/cxx/t-ternary.cc

@@ -0,0 +1,734 @@
+/* Test mp*_class ternary expressions.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+/* The various test cases are broken up into separate functions to keep down
+   compiler memory use.  They're static so that any mistakenly omitted from
+   main() will provoke warnings (under gcc -Wall at least).  */
+
+static void
+check_mpz_1 (void)
+{
+  // template<class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_2 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_3 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_4 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, T>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0;
+    mpz_class e;
+    e = a + b * (c + d); ASSERT_ALWAYS(e == 15);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0;
+    mpz_class e;
+    e = a - b * (c + d); ASSERT_ALWAYS(e == -13);
+  }
+}
+
+static void
+check_mpz_5 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4;
+    mpz_class e;
+    e = a + (b - d) * c; ASSERT_ALWAYS(e == -5);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4;
+    mpz_class e;
+    e = a - (b - d) * c; ASSERT_ALWAYS(e == 7);
+  }
+}
+
+static void
+check_mpz_6 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, U, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4;
+    mpz_class e;
+    e = a + (b + c) * d; ASSERT_ALWAYS(e == 21);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4;
+    mpz_class e;
+    e = a - (b + c) * d; ASSERT_ALWAYS(e == -19);
+  }
+}
+
+static void
+check_mpz_7 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, __gmp_expr<mpz_t, U>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = a + c * (b + d); ASSERT_ALWAYS(e == 19);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = a - c * (b + d); ASSERT_ALWAYS(e == -17);
+  }
+}
+
+static void
+check_mpz_8 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>,
+  // Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4, e = 5;
+    mpz_class f;
+    f = a + (b - d) * (c + e); ASSERT_ALWAYS(f == -15);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4, e = 5;
+    mpz_class f;
+    f = a - (b - d) * (c + e); ASSERT_ALWAYS(f == 17);
+  }
+}
+
+static void
+check_mpz_9 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4;
+    mpz_class e;
+    e = (a + d) + b * c; ASSERT_ALWAYS(e == 11);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4;
+    mpz_class e;
+    e = (a + d) - b * c; ASSERT_ALWAYS(e == -1);
+  }
+}
+
+static void
+check_mpz_10 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, U, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = (a - c) + b * d; ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = (a - c) - b * d; ASSERT_ALWAYS(e == -10);
+  }
+}
+
+static void
+check_mpz_11 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<U, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4;
+    mpz_class e;
+    e = (a - c) + d * b; ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4;
+    mpz_class e;
+    e = (a - c) - d * b; ASSERT_ALWAYS(e == -10);
+  }
+}
+
+static void
+check_mpz_12 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, U>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4, e = 5;
+    mpz_class f;
+    f = (a + d) + b * (c - e); ASSERT_ALWAYS(f == 1);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4, e = 5;
+    mpz_class f;
+    f = (a + d) - b * (c - e); ASSERT_ALWAYS(f == 9);
+  }
+}
+
+static void
+check_mpz_13 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0;
+    mpz_class f;
+    f = (a - d) + (b + e) * c; ASSERT_ALWAYS(f == 18);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0;
+    mpz_class f;
+    f = (a - d) - (b + e) * c; ASSERT_ALWAYS(f == -24);
+  }
+
+}
+
+static void
+check_mpz_14 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, V, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a + c) + (b + d) * e; ASSERT_ALWAYS(f == 34);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a + c) - (b + d) * e; ASSERT_ALWAYS(f == -26);
+  }
+}
+
+static void
+check_mpz_15 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<U, __gmp_expr<mpz_t, V>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a - c) + d * (b - e); ASSERT_ALWAYS(f == -14);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a - c) - d * (b - e); ASSERT_ALWAYS(f == 10);
+  }
+
+}
+
+static void
+check_mpz_16 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, __gmp_expr<mpz_t, V>,
+  // Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0, f = 6.0;
+    mpz_class g;
+    g = (a + d) + (b - e) * (c + f); ASSERT_ALWAYS(g == -22);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0, f = 6.0;
+    mpz_class g;
+    g = (a + d) - (b - e) * (c + f); ASSERT_ALWAYS(g == 32);
+  }
+}
+
+static void
+check_mpz_17 (void)
+{
+  // template <class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b + c; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b - c; ASSERT_ALWAYS(d == 2);
+  }
+}
+
+static void
+check_mpz_18 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 11);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 5);
+  }
+
+}
+
+static void
+check_mpz_19 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 11);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 5);
+  }
+}
+
+static void
+check_mpz_20 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0;
+    mpz_class e;
+    e = a * (b + d) + c; ASSERT_ALWAYS(e == 20);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0;
+    mpz_class e;
+    e = a * (b + d) - c; ASSERT_ALWAYS(e == 12);
+  }
+}
+
+static void
+check_mpz_21 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5;
+    mpz_class e;
+    e = (a - d) * b + c; ASSERT_ALWAYS(e == -5);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5;
+    mpz_class e;
+    e = (a - d) * b - c; ASSERT_ALWAYS(e == -13);
+  }
+}
+
+static void
+check_mpz_22 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, U, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5;
+    mpz_class e;
+    e = (a + c) * d + b; ASSERT_ALWAYS(e == 33);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5;
+    mpz_class e;
+    e = (a + c) * d - b; ASSERT_ALWAYS(e == 27);
+  }
+}
+
+static void
+check_mpz_23 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = c * (a + d) + b; ASSERT_ALWAYS(e == 31);
+  }
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = c * (a + d) - b; ASSERT_ALWAYS(e == 25);
+  }
+
+}
+
+static void
+check_mpz_24 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5, e = 6;
+    mpz_class f;
+    f = (a - d) * (b + e) + c; ASSERT_ALWAYS(f == -23);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5, e = 6;
+    mpz_class f;
+    f = (a - d) * (b + e) - c; ASSERT_ALWAYS(f == -31);
+  }
+}
+
+static void
+check_mpz_25 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, mpz_class, Op1> >, __gmp_expr<mpz_t, T>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5;
+    mpz_class e;
+    e = a * b + (c - d); ASSERT_ALWAYS(e == 5);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5;
+    mpz_class e;
+    e = a * b - (c - d); ASSERT_ALWAYS(e == 7);
+  }
+}
+
+static void
+check_mpz_26 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, T, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = a * c + (b + d); ASSERT_ALWAYS(e == 16);
+  }
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = a * c - (b + d); ASSERT_ALWAYS(e == 0);
+  }
+}
+
+static void
+check_mpz_27 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5;
+    mpz_class e;
+    e = c * a + (b - d); ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5;
+    mpz_class e;
+    e = c * a - (b - d); ASSERT_ALWAYS(e == 10);
+  }
+}
+
+static void
+check_mpz_28 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5, e = 6;
+    mpz_class f;
+    f = a * (b - d) + (c + e); ASSERT_ALWAYS(f == 6);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5, e = 6;
+    mpz_class f;
+    f = a * (b - d) - (c + e); ASSERT_ALWAYS(f == -14);
+  }
+}
+
+static void
+check_mpz_29 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0;
+    mpz_class f;
+    f = (a + d) * b + (c - e); ASSERT_ALWAYS(f == 19);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0;
+    mpz_class f;
+    f = (a + d) * b - (c - e); ASSERT_ALWAYS(f == 23);
+  }
+}
+
+static void
+check_mpz_30 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, U, Op1> >, __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = (a + c) * d + (b + e); ASSERT_ALWAYS(f == 39);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = (a + c) * d - (b + e); ASSERT_ALWAYS(f == 21);
+  }
+}
+
+static void
+check_mpz_31 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, __gmp_expr<mpz_t, U>, Op1> >, __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = c * (a + d) + (b - e); ASSERT_ALWAYS(f == 25);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = c * (a + d) - (b - e); ASSERT_ALWAYS(f == 31);
+  }
+}
+
+static void
+check_mpz_32 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >,
+  // __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0, f = 7.0;
+    mpz_class g;
+    g = (a + d) * (b - e) + (c + f); ASSERT_ALWAYS(g == -10);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0, f = 7.0;
+    mpz_class g;
+    g = (a + d) * (b - e) - (c + f); ASSERT_ALWAYS(g == -32);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // currently there's no ternary mpq operation
+}
+
+void
+check_mpf (void)
+{
+  // currently there's no ternary mpf operation
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz_1 ();
+  check_mpz_2 ();
+  check_mpz_3 ();
+  check_mpz_4 ();
+  check_mpz_5 ();
+  check_mpz_6 ();
+  check_mpz_7 ();
+  check_mpz_8 ();
+  check_mpz_9 ();
+  check_mpz_10 ();
+  check_mpz_11 ();
+  check_mpz_12 ();
+  check_mpz_13 ();
+  check_mpz_14 ();
+  check_mpz_15 ();
+  check_mpz_16 ();
+  check_mpz_17 ();
+  check_mpz_18 ();
+  check_mpz_19 ();
+  check_mpz_20 ();
+  check_mpz_21 ();
+  check_mpz_22 ();
+  check_mpz_23 ();
+  check_mpz_24 ();
+  check_mpz_25 ();
+  check_mpz_26 ();
+  check_mpz_27 ();
+  check_mpz_28 ();
+  check_mpz_29 ();
+  check_mpz_30 ();
+  check_mpz_31 ();
+  check_mpz_32 ();
+
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/cxx/t-unary.cc b/tests/cxx/t-unary.cc
new file mode 100644
index 0000000..c7d8bf6
--- /dev/null
+++ b/tests/cxx/t-unary.cc

@@ -0,0 +1,132 @@
+/* Test mp*_class unary expressions.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(1);
+    mpz_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+  {
+    mpz_class a(3);
+    mpz_class b;
+    b = ~a; ASSERT_ALWAYS(b == -4);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpz_class a(1);
+    mpz_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1);
+    mpq_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(2);
+    mpq_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpq_class a(1);
+    mpq_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(2);
+    mpq_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(1);
+    mpf_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpf_class a(1);
+    mpf_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}

diff --git a/tests/devel/gen-test-longlong_h.c b/tests/devel/gen-test-longlong_h.c
new file mode 100644
index 0000000..939c3f2
--- /dev/null
+++ b/tests/devel/gen-test-longlong_h.c

@@ -0,0 +1,140 @@
+/*
+Copyright 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+typedef unsigned long mp_limb_t; /* neat */
+
+void
+one (const char *op, size_t ind, mp_limb_t m0, mp_limb_t s0)
+{
+  printf ("static void f%zu(mp_limb_t*r1p,mp_limb_t*r0p){", ind);
+  printf ("mp_limb_t r1,r0;");
+  printf ("%s(r1,r0,0,%ld,0,%ld);", op, (long) m0, (long) s0);
+  printf ("*r1p=r1;*r0p=r0;");
+  printf ("}\n");
+}
+
+mp_limb_t ops[1000];
+
+enum what_t {ADD, SUB};
+
+int
+main (int argc, char **argv)
+{
+  size_t n_operands = 0;
+  size_t n_functions = 0;
+  const char *op;
+  enum what_t what;
+
+  if (argc == 2 && strcmp (argv[1], "add") == 0)
+    {
+      op = "add_ssaaaa";
+      what = ADD;
+    }
+  else if (argc == 2 && strcmp (argv[1], "sub") == 0)
+    {
+      op = "sub_ddmmss";
+      what = SUB;
+    }
+  else
+    {
+      fprintf (stderr, "what do yuo want me to do?\n");
+      exit (1);
+    }
+
+  for (int i = 0; i < 16; i++)
+    {
+      ops[n_operands++] = 1 << i;
+      ops[n_operands++] = -(1 << i);
+      ops[n_operands++] = (1 << i) - 1;
+      ops[n_operands++] = -(1 << i) - 1;
+    }
+
+  printf ("#include <stdlib.h>\n");
+  printf ("#include <stdio.h>\n");
+  printf ("#include \"gmp-impl.h\"\n");
+  printf ("#include \"longlong.h\"\n");
+
+  /* Print out ops[] definition.  */
+  printf ("static const int ops[%zu] = {\n", n_operands);
+  for (int i = 0; i < n_operands; i++)
+    {
+      printf ("%ld,", (long) ops[i]);
+      if ((i + 1) % 4 == 0)
+	puts ("");
+    }
+  printf ("};\n");
+
+  /* Generate functions and print them.  */
+  for (int i = 0; i < n_operands; i++)
+    {
+      for (int j = 0; j < n_operands; j++)
+	{
+	  one (op, n_functions++, ops[i], ops[j]);
+	}
+    }
+
+  /* Print out function pointer table.  */
+  printf ("typedef void (*func_t) (mp_limb_t*, mp_limb_t*);\n");
+  printf ("static const func_t funcs[%zu] = {\n", n_functions);
+  for (size_t i = 0; i < n_functions; i++)
+    {
+      printf ("f%zu,", i);
+      if ((i + 1) % 16 == 0)
+	puts ("");
+    }
+  printf ("};\n");
+
+  /* Print out table of reference results.  */
+  printf ("static const int ref[%zu][2] = {\n", n_functions);
+  for (int i = 0; i < n_operands; i++)
+    {
+      for (int j = 0; j < n_operands; j++)
+	{
+	  if (what == ADD)
+	    printf ("{%6ld,%2ld},", (long) ( ops[i] + ops[j]), (long) ((mp_limb_t) ((ops[i] + ops[j]) < ops[i])));
+	  else     /* SUB */
+	    printf ("{%6ld,%2ld},", (long) ( ops[i] - ops[j]), (long) (-(mp_limb_t) (ops[i] < ops[j])));
+	  if ((i * n_operands + j) % 8 == 0)
+	    puts ("");
+	}
+    }
+  printf ("};\n");
+
+  printf ("int main ()\n{\n");
+  printf ("  mp_limb_t r1, r0;\n");
+  printf ("  int err = 0;\n");
+  printf ("  size_t ind = 0;\n");
+  printf ("  for (size_t i = 0; i < %zu; i++)\n", n_functions);
+  printf ("    {\n");
+  printf ("      int ii = i / %zu, jj = i %% %zu;\n", n_operands, n_operands);
+  printf ("      funcs[i](&r1, &r0);\n");
+  printf ("      if (r0 != (mp_limb_signed_t) ref[ind][0] || r1 != (mp_limb_signed_t) ref[ind][1]) {\n");
+  printf ("         printf (\"error for f%%zu(%%d,%%d): want (%%d,%%d) got (%%d,%%d)\\n\", i, (int) ops[ii], (int) ops[jj], ref[ind][1], ref[ind][0], (int) r1, (int) r0);\n");
+  printf ("         err++;\n");
+  printf ("       }\n");
+  printf ("      ind++;\n");
+  printf ("    }\n");
+
+  printf ("  return err != 0;\n");
+  printf ("}\n");
+  return 0;
+}

diff --git a/tests/devel/primes.c b/tests/devel/primes.c
new file mode 100644
index 0000000..8e58962
--- /dev/null
+++ b/tests/devel/primes.c

@@ -0,0 +1,424 @@
+/*
+Copyright 2018-2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* Usage:
+
+   ./primes [p|c] [n0] <nMax>
+
+     Checks mpz_probab_prime_p(n, r) exhaustively, starting from n=n0
+     up to nMax.
+     If n0 * n0 > nMax, the intervall is sieved piecewise, else the
+     full intervall [0..nMax] is sieved at once.
+     With the parameter "p" (or nothing), tests all numbers. With "c"
+     only composites are tested.
+
+   ./primes n|N [n0] <nMax>
+
+     Checks mpz_nextprime() exhaustively, starting from n=n0 up to
+     nMax. With "n", only the sequence of primes is checked, with "N"
+     the function is tested on every number in the interval.
+
+     WARNING: The full intervall [0..nMax] is sieved at once, even if
+     only a piece is needed. This may require a lot of memory!
+
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+#define STOP(x) return (x)
+/* #define STOP(x) x */
+#define REPS 10
+/* #define TRACE(x,n) if ((n)>1) {x;} */
+#define TRACE(x,n)
+
+/* The full primesieve.c is included, just for block_resieve, that
+   is not exported ... */
+#undef gmp_primesieve
+#include "../../primesieve.c"
+
+#ifndef BLOCK_SIZE
+#define BLOCK_SIZE 2048
+#endif
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_fto_bit(n) / GMP_LIMB_BITS + 1; }
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end)			\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if ((*__sieve & __mask) == 0)				\
+	{							\
+	  mp_limb_t prime;					\
+	  prime = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
+  do {								\
+    mp_limb_t __mask, *__sieve, __max_i, __i;			\
+								\
+    __i = (start)-(off);					\
+    __sieve = (sieve) + __i / GMP_LIMB_BITS;			\
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
+    __i += (off);						\
+								\
+    LOOP_ON_SIEVE_CONTINUE(prime,end)
+
+#define LOOP_ON_SIEVE_STOP					\
+	}							\
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
+      __sieve += __mask & 1;					\
+    }  while (__i <= __max_i)
+
+#define LOOP_ON_SIEVE_END					\
+    LOOP_ON_SIEVE_STOP;						\
+  } while (0)
+
+mpz_t g;
+
+int something_wrong (mpz_t er, int exp)
+{
+  fprintf (stderr, "value = %lu , expected = %i\n", mpz_get_ui (er), exp);
+  return -1;
+}
+
+int
+check_pprime (unsigned long begin, unsigned long end, int composites)
+{
+  begin = (begin / 6U) * 6U;
+  for (;(begin < 2) & (begin <= end); ++begin)
+    {
+      *(g->_mp_d) = begin;
+      TRACE(printf ("-%li ", begin),1);
+      if (mpz_probab_prime_p (g, REPS))
+	STOP (something_wrong (g, 0));
+    }
+  for (;(begin < 4) & (begin <= end); ++begin)
+    {
+      *(g->_mp_d) = begin;
+      TRACE(printf ("+%li ", begin),2);
+      if (!composites && !mpz_probab_prime_p (g, REPS))
+	STOP (something_wrong (g, 1));
+    }
+  if (end > 4) {
+    if ((end > 10000) && (begin > end / begin))
+      {
+	mp_limb_t *sieve, *primes;
+	mp_size_t size_s, size_p, off;
+	unsigned long start;
+
+	mpz_set_ui (g, end);
+	mpz_sqrt (g, g);
+	start = mpz_get_ui (g) + GMP_LIMB_BITS;
+	size_p = primesieve_size (start);
+
+	primes = __GMP_ALLOCATE_FUNC_LIMBS (size_p);
+	gmp_primesieve (primes, start);
+
+	size_s = BLOCK_SIZE * 2;
+	sieve = __GMP_ALLOCATE_FUNC_LIMBS (size_s);
+	off = n_cto_bit(begin);
+
+	do {
+	  TRACE (printf ("off =%li\n", off),3);
+	  block_resieve (sieve, BLOCK_SIZE, off, primes);
+	  TRACE (printf ("LOOP =%li - %li\n", id_to_n (off+1), id_to_n (off + BLOCK_SIZE * GMP_LIMB_BITS)),3);
+	  LOOP_ON_SIEVE_BEGIN (prime, off, off + BLOCK_SIZE * GMP_LIMB_BITS - 1,
+			       off, sieve);
+
+	  do {
+	    *(g->_mp_d) = begin;
+	    TRACE(printf ("-%li ", begin),1);
+	    if (mpz_probab_prime_p (g, REPS))
+	      STOP (something_wrong (g, 0));
+	    if ((begin & 0xff) == 0)
+	      {
+		spinner();
+		if ((begin & 0xfffffff) == 0)
+		  printf ("%li (0x%lx)\n", begin, begin);
+	      }
+	  } while (++begin < prime);
+
+	  *(g->_mp_d) = begin;
+	  TRACE(printf ("+%li ", begin),2);
+	  if (!composites && ! mpz_probab_prime_p (g, REPS))
+	    STOP (something_wrong (g, 1));
+	  ++begin;
+
+	  LOOP_ON_SIEVE_END;
+	  off += BLOCK_SIZE * GMP_LIMB_BITS;
+	} while (begin < end);
+
+	__GMP_FREE_FUNC_LIMBS (sieve, size_s);
+	__GMP_FREE_FUNC_LIMBS (primes, size_p);
+      }
+    else
+      {
+	mp_limb_t *sieve;
+	mp_size_t size;
+	unsigned long start;
+
+	size = primesieve_size (end);
+
+	sieve = __GMP_ALLOCATE_FUNC_LIMBS (size);
+	gmp_primesieve (sieve, end);
+	start = MAX (begin, 5) | 1;
+	LOOP_ON_SIEVE_BEGIN (prime, n_cto_bit(start),
+			     n_fto_bit (end), 0, sieve);
+
+	do {
+	  *(g->_mp_d) = begin;
+	  TRACE(printf ("-%li ", begin),1);
+	  if (mpz_probab_prime_p (g, REPS))
+	    STOP (something_wrong (g, 0));
+	  if ((begin & 0xff) == 0)
+	    {
+	      spinner();
+	      if ((begin & 0xfffffff) == 0)
+		printf ("%li (0x%lx)\n", begin, begin);
+	    }
+	} while (++begin < prime);
+
+	*(g->_mp_d) = begin;
+	TRACE(printf ("+%li ", begin),2);
+	if (!composites && ! mpz_probab_prime_p (g, REPS))
+	  STOP (something_wrong (g, 1));
+	++begin;
+
+	LOOP_ON_SIEVE_END;
+
+	__GMP_FREE_FUNC_LIMBS (sieve, size);
+      }
+  }
+
+  for (;begin < end; ++begin)
+    {
+      *(g->_mp_d) = begin;
+      TRACE(printf ("-%li ", begin),1);
+      if (mpz_probab_prime_p (g, REPS))
+	STOP (something_wrong (g, 0));
+    }
+
+  gmp_printf ("%Zd\n", g);
+  return 0;
+}
+
+int
+check_nprime (unsigned long begin, unsigned long end)
+{
+  if (begin < 2)
+    {
+      *(g->_mp_d) = begin;
+      g->_mp_size = begin;
+      TRACE(printf ("%li ", begin),1);
+      mpz_nextprime (g, g);
+      if (mpz_cmp_ui (g, 2) != 0)
+	STOP (something_wrong (g, 2));
+      begin = mpz_get_ui (g);
+    }
+  if (begin < 3)
+    {
+      *(g->_mp_d) = begin;
+      TRACE(printf ("%li ", begin),1);
+      mpz_nextprime (g, g);
+      if (mpz_cmp_ui (g, 3) != 0)
+	STOP (something_wrong (g, 3));
+      begin = mpz_get_ui (g);
+    }
+  if (end > 4)
+      {
+	mp_limb_t *sieve;
+	mp_size_t size;
+	unsigned long start;
+
+	size = primesieve_size (end);
+
+	sieve = __GMP_ALLOCATE_FUNC_LIMBS (size);
+	gmp_primesieve (sieve, end);
+	start = MAX (begin, 5) | 1;
+	*(g->_mp_d) = begin;
+	LOOP_ON_SIEVE_BEGIN (prime, n_cto_bit(start),
+			     n_fto_bit (end), 0, sieve);
+
+	mpz_nextprime (g, g);
+	if (mpz_cmp_ui (g, prime) != 0)
+	  STOP (something_wrong (g, prime));
+
+	if (prime - start > 200)
+	  {
+	    start = prime;
+	    spinner();
+	    if (prime - begin > 0xfffffff)
+	      {
+		begin = prime;
+		printf ("%li (0x%lx)\n", begin, begin);
+	      }
+	  }
+
+	LOOP_ON_SIEVE_END;
+
+	__GMP_FREE_FUNC_LIMBS (sieve, size);
+      }
+
+  if (mpz_cmp_ui (g, end) < 0)
+    {
+      mpz_nextprime (g, g);
+      if (mpz_cmp_ui (g, end) <= 0)
+	STOP (something_wrong (g, -1));
+    }
+
+  gmp_printf ("%Zd\n", g);
+  return 0;
+}
+
+int
+check_Nprime (unsigned long begin, unsigned long end)
+{
+  mpz_t op;
+  mpz_init_set_ui (op, end);
+
+  for (;begin < 2; ++begin)
+    {
+      *(op->_mp_d) = begin;
+      op->_mp_size = begin;
+      TRACE(printf ("%li ", begin),1);
+      mpz_nextprime (g, op);
+      if (mpz_cmp_ui (g, 2) != 0)
+	STOP (something_wrong (g, 2));
+    }
+  if (begin < 3)
+    {
+      *(op->_mp_d) = begin;
+      TRACE(printf ("%li ", begin),1);
+      mpz_nextprime (g, op);
+      if (mpz_cmp_ui (g, 3) != 0)
+	STOP (something_wrong (g, 3));
+      begin = 3;
+    }
+  if (end > 4)
+      {
+	mp_limb_t *sieve;
+	mp_size_t size;
+	unsigned long start;
+	unsigned long opl;
+
+	size = primesieve_size (end);
+
+	sieve = __GMP_ALLOCATE_FUNC_LIMBS (size);
+	gmp_primesieve (sieve, end);
+	start = MAX (begin, 5) | 1;
+	opl = begin;
+	LOOP_ON_SIEVE_BEGIN (prime, n_cto_bit(start),
+			     n_fto_bit (end), 0, sieve);
+
+	do {
+	  *(op->_mp_d) = opl;
+	  mpz_nextprime (g, op);
+	  if (mpz_cmp_ui (g, prime) != 0)
+	    STOP (something_wrong (g, prime));
+	  ++opl;
+	} while (opl < prime);
+
+	if (prime - start > 200)
+	  {
+	    start = prime;
+	    spinner();
+	    if (prime - begin > 0xfffffff)
+	      {
+		begin = prime;
+		printf ("%li (0x%lx)\n", begin, begin);
+	      }
+	  }
+
+	LOOP_ON_SIEVE_END;
+
+	__GMP_FREE_FUNC_LIMBS (sieve, size);
+      }
+
+  if (mpz_cmp_ui (g, end) < 0)
+    {
+      mpz_nextprime (g, g);
+      if (mpz_cmp_ui (g, end) <= 0)
+	STOP (something_wrong (g, -1));
+    }
+
+  gmp_printf ("%Zd\n", g);
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int ret, mode = 0;
+  unsigned long begin = 0, end = 0;
+
+  for (;argc > 1;--argc,++argv)
+    switch (*argv[1]) {
+    case 'p':
+      mode = 0;
+      break;
+    case 'c':
+      mode = 2;
+      break;
+    case 'n':
+      mode = 1;
+      break;
+    case 'N':
+      mode = 3;
+      break;
+    default:
+      begin = end;
+      end = atol (argv[1]);
+    }
+
+  if (begin >= end)
+    {
+      fprintf (stderr, "usage: primes [N|n|p|c] [n0] <nMax>\n");
+      exit (1);
+    }
+
+  mpz_init_set_ui (g, ULONG_MAX);
+
+  switch (mode) {
+  case 1:
+    ret = check_nprime (begin, end);
+    break;
+  case 3:
+    ret = check_Nprime (begin, end);
+    break;
+  default:
+    ret = check_pprime (begin, end, mode);
+  }
+
+  mpz_clear (g);
+
+  if (ret == 0)
+    printf ("Prime tests checked in [%lu - %lu] [0x%lx - 0x%lx].\n", begin, end, begin, end);
+  return ret;
+}

diff --git a/tests/devel/sqrtrem_1_2.c b/tests/devel/sqrtrem_1_2.c
new file mode 100644
index 0000000..3951191
--- /dev/null
+++ b/tests/devel/sqrtrem_1_2.c

@@ -0,0 +1,401 @@
+/*
+Copyright 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* Usage:
+
+   ./sqrtrem_1_2 x
+
+     Checks mpn_sqrtrem() exhaustively, starting from 0, incrementing
+     the operand by a single unit, until all values handled by
+     mpn_sqrtrem{1,2} are tested. SLOW.
+
+   ./sqrtrem_1_2 s 1
+
+     Checks some special cases for mpn_sqrtrem(). I.e. values of the form
+     2^k*i and 2^k*(i+1)-1, with k=2^n and 0<i<2^k, until all such values,
+     handled by mpn_sqrtrem{1,2}, are tested.
+     Currently supports only the test of values that fits in one limb.
+     Less slow than the exhaustive test.
+
+   ./sqrtrem_1_2 c
+
+     Checks all corner cases for mpn_sqrtrem(). I.e. values of the form
+     i*i and (i+1)*(i+1)-1, for each value of i, until all such values,
+     handled by mpn_sqrtrem{1,2}, are tested.
+     Slightly faster than the special cases test.
+
+   For larger values, use
+   ./try mpn_sqrtrem
+
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+#define STOP(x) return (x)
+/* #define STOP(x) x */
+#define SPINNER(v)					\
+  do {							\
+    MPN_SIZEINBASE_2EXP (spinner_count, q, v, 1);	\
+    --spinner_count;					\
+    spinner();						\
+  } while (0)
+
+int something_wrong (mp_limb_t er, mp_limb_t ec, mp_limb_t es)
+{
+  fprintf (stderr, "root = %lu , rem = {%lu , %lu}\n", (long unsigned) es,(long unsigned) ec,(long unsigned) er);
+  return -1;
+}
+
+int
+check_all_values (int justone, int quick)
+{
+  mp_limb_t es, mer, er, s[1], r[2], q[2];
+  mp_size_t x;
+  unsigned bits;
+
+  es=1;
+  if (quick) {
+    printf ("Quick, skipping some... (%u)\n", GMP_NUMB_BITS - 2);
+    es <<= GMP_NUMB_BITS / 2 - 1;
+  }
+  er=0;
+  mer= es << 1;
+  *q = es * es;
+  printf ("All values tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 1);
+    if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 1) && (er != *r)))
+      STOP (something_wrong (er, 0, es));
+
+    if (UNLIKELY (er == mer)) {
+      ++es;
+      if (UNLIKELY ((es & 0xff) == 0))
+	SPINNER(1);
+      mer +=2; /* mer = es * 2 */
+      er = 0;
+    } else
+      ++er;
+    ++*q;
+  } while (*q != 0);
+  q[1] = 1;
+  SPINNER(2);
+  printf ("\nValues of a single limb, tested.\n");
+  if (justone) return 0;
+  printf ("All values tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 2);
+    if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 1) && (er != *r)))
+      STOP (something_wrong (er, 0, es));
+
+    if (UNLIKELY (er == mer)) {
+      ++es;
+      if (UNLIKELY ((es & 0x7f) == 0))
+	SPINNER(2);
+      mer +=2; /* mer = es * 2 */
+      if (UNLIKELY (mer == 0))
+	break;
+      er = 0;
+    } else
+      ++er;
+    q[1] += (++*q == 0);
+  } while (1);
+  SPINNER(2);
+  printf ("\nValues with at most a limb for reminder, tested.\n");
+  printf ("Testing more values not supported, jet.\n");
+  return 0;
+}
+
+mp_limb_t
+upd (mp_limb_t *s, mp_limb_t k)
+{
+  mp_limb_t _s = *s;
+
+  while (k > _s * 2)
+    {
+      k -= _s * 2 + 1;
+      ++_s;
+    }
+  *s = _s;
+  return k;
+}
+
+mp_limb_t
+upd1 (mp_limb_t *s, mp_limb_t k)
+{
+  mp_limb_t _s = *s;
+
+  if (LIKELY (k < _s * 2)) return k + 1;
+  *s = _s + 1;
+  return k - _s * 2;
+}
+
+int
+check_some_values (int justone, int quick)
+{
+  mp_limb_t es, her, er, k, s[1], r[2], q[2];
+  mp_size_t x;
+  unsigned bits;
+
+  es = 1 << 1;
+  if (quick) {
+    es <<= GMP_NUMB_BITS / 4 - 1;
+    printf ("Quick, skipping some... (%u)\n", GMP_NUMB_BITS / 2);
+  }
+  er = 0;
+  *q = es * es;
+  printf ("High-half values tested, up to bits:\n");
+  do {
+    k  = *q - 1;
+    do {
+      x = mpn_sqrtrem (s, r, q, 1);
+      if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	  || UNLIKELY ((x == 1) && (er != *r)))
+	STOP (something_wrong (er, 0, es));
+
+      if (UNLIKELY ((es & 0xffff) == 0))
+	SPINNER(1);
+      if ((*q & k) == 0) {
+	*q |= k;
+	er = upd (&es, k + er);
+      } else {
+	++*q;
+	er = upd1 (&es, er);
+      }
+    } while (es & k);
+  } while (*q != 0);
+  q[1] = 1;
+  SPINNER(2);
+  printf ("\nValues of a single limb, tested.\n");
+  if (justone) return 0;
+  if (quick) {
+    es <<= GMP_NUMB_BITS / 2 - 1;
+    q[1] <<= GMP_NUMB_BITS - 2;
+    printf ("Quick, skipping some... (%u)\n", GMP_NUMB_BITS - 2);
+  }
+  printf ("High-half values tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 2);
+    if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 1) && (er != *r)))
+      STOP (something_wrong (er, 0, es));
+
+    if (*q == 0) {
+      *q = GMP_NUMB_MAX;
+      if (UNLIKELY ((es & 0xffff) == 0)) {
+	if (UNLIKELY (es == GMP_NUMB_HIGHBIT))
+	  break;
+	SPINNER(2);
+      }
+      /* er = er + GMP_NUMB_MAX - 1 - es*2 // postponed */
+      ++es;
+      /* er = er + GMP_NUMB_MAX - 1 - 2*(es-1) =
+            = er +(GMP_NUMB_MAX + 1)- 2* es = er - 2*es */
+      er = upd (&es, er - 2 * es);
+    } else {
+      *q = 0;
+      ++q[1];
+      er = upd1 (&es, er);
+    }
+  } while (1);
+  SPINNER(2);
+  printf ("\nValues with at most a limb for reminder, tested.\n");
+  er = GMP_NUMB_MAX; her = 0;
+
+  printf ("High-half values tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 2);
+    if (UNLIKELY (x != (her?2:(er != 0))) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x != 0) && ((er != *r) || ((x == 2) && (r[1] != 1)))))
+      STOP (something_wrong (er, her, es));
+
+    if (*q == 0) {
+      *q = GMP_NUMB_MAX;
+      if (UNLIKELY ((es & 0xffff) == 0)) {
+	SPINNER(2);
+      }
+      if (her) {
+	++es;
+	her = 0;
+	er = er - 2 * es;
+      } else {
+	her = --er != GMP_NUMB_MAX;
+	if (her & (er > es * 2)) {
+	  er -= es * 2 + 1;
+	  her = 0;
+	  ++es;
+	}
+      }
+    } else {
+      *q = 0;
+      if (++q[1] == 0) break;
+      if ((her == 0) | (er < es * 2)) {
+	her += ++er == 0;
+      }	else {
+	  er -= es * 2;
+	  her = 0;
+	  ++es;
+      }
+    }
+  } while (1);
+  printf ("| %u\nValues of at most two limbs, tested.\n", GMP_NUMB_BITS*2);
+  return 0;
+}
+
+int
+check_corner_cases (int justone, int quick)
+{
+  mp_limb_t es, er, s[1], r[2], q[2];
+  mp_size_t x;
+  unsigned bits;
+
+  es = 1;
+  if (quick) {
+    es <<= GMP_NUMB_BITS / 2 - 1;
+    printf ("Quick, skipping some... (%u)\n", GMP_NUMB_BITS - 2);
+  }
+  er = 0;
+  *q = es*es;
+  printf ("Corner cases tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 1);
+    if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 1) && (er != *r)))
+      STOP (something_wrong (er, 0, es));
+
+    if (er != 0) {
+      ++es;
+      if (UNLIKELY ((es & 0xffff) == 0))
+	SPINNER(1);
+      er = 0;
+      ++*q;
+    } else {
+      er = es * 2;
+      *q += er;
+    }
+  } while (*q != 0);
+  q[1] = 1;
+  SPINNER(2);
+  printf ("\nValues of a single limb, tested.\n");
+  if (justone) return 0;
+  if (quick) {
+    es <<= GMP_NUMB_BITS / 2 - 1;
+    q[1] <<= GMP_NUMB_BITS - 2;
+    printf ("Quick, skipping some... (%u)\n", GMP_NUMB_BITS - 2);
+    --es;
+    --q[1];
+    q[0] -= es*2+1;
+  }
+  printf ("Corner cases tested, up to bits:\n");
+  do {
+    x = mpn_sqrtrem (s, r, q, 2);
+    if (UNLIKELY (x != (er != 0)) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 1) && (er != *r)))
+      STOP (something_wrong (er, 0, es));
+
+    if (er != 0) {
+      ++es;
+      if (UNLIKELY ((es & 0xff) == 0))
+	SPINNER(2);
+      er = 0;
+      q[1] += (++*q == 0);
+      if (UNLIKELY (es == GMP_NUMB_HIGHBIT))
+	break;
+    } else {
+      er = es * 2;
+      add_ssaaaa (q[1], *q, q[1], *q, 0, er);
+    }
+  } while (1);
+  SPINNER(2);
+  printf ("\nValues with at most a limb for reminder, tested.\nCorner cases tested, up to bits:\n");
+  x = mpn_sqrtrem (s, r, q, 2);
+  if ((*s != es) || (x != 0))
+    STOP (something_wrong (0, 0, es));
+  q[1] += 1;
+  x = mpn_sqrtrem (s, r, q, 2);
+  if ((*s != es) || (x != 2) || (*r != 0) || (r[1] != 1))
+    STOP (something_wrong (0, 1, es));
+  ++es;
+  q[1] += (++*q == 0);
+  do {
+    x = mpn_sqrtrem (s, r, q, 2);
+    if (UNLIKELY (x != (er != 0) * 2) || UNLIKELY (*s != es)
+	|| UNLIKELY ((x == 2) && ((er != *r) || (r[1] != 1))))
+      STOP (something_wrong (er, er != 0, es));
+
+    if (er != 0) {
+      ++es;
+      if (UNLIKELY (es == 0))
+	break;
+      if (UNLIKELY ((es & 0xff) == 0))
+	SPINNER(2);
+      er = 0;
+      q[1] += (++*q == 0);
+    } else {
+      er = es * 2;
+      add_ssaaaa (q[1], *q, q[1], *q, 1, er);
+    }
+  } while (1);
+  printf ("| %u\nValues of at most two limbs, tested.\n", GMP_NUMB_BITS*2);
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int mode = 0;
+  int justone = 0;
+  int quick = 0;
+
+  for (;argc > 1;--argc,++argv)
+    switch (*argv[1]) {
+    default:
+      fprintf (stderr, "usage: sqrtrem_1_2 [x|c|s] [1|2] [q]\n");
+      exit (1);
+    case 'x':
+      mode = 0;
+      break;
+    case 'c':
+      mode = 1;
+      break;
+    case 's':
+      mode = 2;
+      break;
+    case 'q':
+      quick = 1;
+      break;
+    case '1':
+      justone = 1;
+      break;
+    case '2':
+      justone = 0;
+    }
+
+  switch (mode) {
+  default:
+    return check_all_values (justone, quick);
+  case 1:
+    return check_corner_cases (justone, quick);
+  case 2:
+    return check_some_values (justone, quick);
+  }
+}

diff --git a/tests/devel/test-add_ssaaaa.c b/tests/devel/test-add_ssaaaa.c
new file mode 100644
index 0000000..b5714f2
--- /dev/null
+++ b/tests/devel/test-add_ssaaaa.c

@@ -0,0 +1,4908 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+static const int ops[64] = {
+1,-1,0,-2,
+2,-2,1,-3,
+4,-4,3,-5,
+8,-8,7,-9,
+16,-16,15,-17,
+32,-32,31,-33,
+64,-64,63,-65,
+128,-128,127,-129,
+256,-256,255,-257,
+512,-512,511,-513,
+1024,-1024,1023,-1025,
+2048,-2048,2047,-2049,
+4096,-4096,4095,-4097,
+8192,-8192,8191,-8193,
+16384,-16384,16383,-16385,
+32768,-32768,32767,-32769,
+};
+static void f0(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f1(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1);*r1p=r1;*r0p=r0;}
+static void f2(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,0);*r1p=r1;*r0p=r0;}
+static void f3(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f4(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2);*r1p=r1;*r0p=r0;}
+static void f5(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f6(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f7(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-3);*r1p=r1;*r0p=r0;}
+static void f8(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4);*r1p=r1;*r0p=r0;}
+static void f9(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4);*r1p=r1;*r0p=r0;}
+static void f10(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,3);*r1p=r1;*r0p=r0;}
+static void f11(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-5);*r1p=r1;*r0p=r0;}
+static void f12(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8);*r1p=r1;*r0p=r0;}
+static void f13(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8);*r1p=r1;*r0p=r0;}
+static void f14(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,7);*r1p=r1;*r0p=r0;}
+static void f15(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-9);*r1p=r1;*r0p=r0;}
+static void f16(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16);*r1p=r1;*r0p=r0;}
+static void f17(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16);*r1p=r1;*r0p=r0;}
+static void f18(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,15);*r1p=r1;*r0p=r0;}
+static void f19(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-17);*r1p=r1;*r0p=r0;}
+static void f20(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32);*r1p=r1;*r0p=r0;}
+static void f21(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32);*r1p=r1;*r0p=r0;}
+static void f22(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,31);*r1p=r1;*r0p=r0;}
+static void f23(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-33);*r1p=r1;*r0p=r0;}
+static void f24(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,64);*r1p=r1;*r0p=r0;}
+static void f25(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-64);*r1p=r1;*r0p=r0;}
+static void f26(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,63);*r1p=r1;*r0p=r0;}
+static void f27(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-65);*r1p=r1;*r0p=r0;}
+static void f28(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,128);*r1p=r1;*r0p=r0;}
+static void f29(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-128);*r1p=r1;*r0p=r0;}
+static void f30(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,127);*r1p=r1;*r0p=r0;}
+static void f31(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-129);*r1p=r1;*r0p=r0;}
+static void f32(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,256);*r1p=r1;*r0p=r0;}
+static void f33(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-256);*r1p=r1;*r0p=r0;}
+static void f34(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,255);*r1p=r1;*r0p=r0;}
+static void f35(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-257);*r1p=r1;*r0p=r0;}
+static void f36(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,512);*r1p=r1;*r0p=r0;}
+static void f37(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-512);*r1p=r1;*r0p=r0;}
+static void f38(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,511);*r1p=r1;*r0p=r0;}
+static void f39(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-513);*r1p=r1;*r0p=r0;}
+static void f40(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1024);*r1p=r1;*r0p=r0;}
+static void f41(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f42(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1023);*r1p=r1;*r0p=r0;}
+static void f43(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f44(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2048);*r1p=r1;*r0p=r0;}
+static void f45(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f46(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2047);*r1p=r1;*r0p=r0;}
+static void f47(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f48(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4096);*r1p=r1;*r0p=r0;}
+static void f49(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f50(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4095);*r1p=r1;*r0p=r0;}
+static void f51(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f52(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8192);*r1p=r1;*r0p=r0;}
+static void f53(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f54(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8191);*r1p=r1;*r0p=r0;}
+static void f55(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f56(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16384);*r1p=r1;*r0p=r0;}
+static void f57(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f58(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16383);*r1p=r1;*r0p=r0;}
+static void f59(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f60(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32768);*r1p=r1;*r0p=r0;}
+static void f61(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f62(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32767);*r1p=r1;*r0p=r0;}
+static void f63(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f64(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,1);*r1p=r1;*r0p=r0;}
+static void f65(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-1);*r1p=r1;*r0p=r0;}
+static void f66(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,0);*r1p=r1;*r0p=r0;}
+static void f67(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-2);*r1p=r1;*r0p=r0;}
+static void f68(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,2);*r1p=r1;*r0p=r0;}
+static void f69(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-2);*r1p=r1;*r0p=r0;}
+static void f70(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,1);*r1p=r1;*r0p=r0;}
+static void f71(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-3);*r1p=r1;*r0p=r0;}
+static void f72(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,4);*r1p=r1;*r0p=r0;}
+static void f73(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-4);*r1p=r1;*r0p=r0;}
+static void f74(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,3);*r1p=r1;*r0p=r0;}
+static void f75(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-5);*r1p=r1;*r0p=r0;}
+static void f76(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,8);*r1p=r1;*r0p=r0;}
+static void f77(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-8);*r1p=r1;*r0p=r0;}
+static void f78(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,7);*r1p=r1;*r0p=r0;}
+static void f79(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-9);*r1p=r1;*r0p=r0;}
+static void f80(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,16);*r1p=r1;*r0p=r0;}
+static void f81(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-16);*r1p=r1;*r0p=r0;}
+static void f82(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,15);*r1p=r1;*r0p=r0;}
+static void f83(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-17);*r1p=r1;*r0p=r0;}
+static void f84(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,32);*r1p=r1;*r0p=r0;}
+static void f85(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-32);*r1p=r1;*r0p=r0;}
+static void f86(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,31);*r1p=r1;*r0p=r0;}
+static void f87(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-33);*r1p=r1;*r0p=r0;}
+static void f88(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,64);*r1p=r1;*r0p=r0;}
+static void f89(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-64);*r1p=r1;*r0p=r0;}
+static void f90(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,63);*r1p=r1;*r0p=r0;}
+static void f91(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-65);*r1p=r1;*r0p=r0;}
+static void f92(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,128);*r1p=r1;*r0p=r0;}
+static void f93(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-128);*r1p=r1;*r0p=r0;}
+static void f94(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,127);*r1p=r1;*r0p=r0;}
+static void f95(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-129);*r1p=r1;*r0p=r0;}
+static void f96(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,256);*r1p=r1;*r0p=r0;}
+static void f97(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-256);*r1p=r1;*r0p=r0;}
+static void f98(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,255);*r1p=r1;*r0p=r0;}
+static void f99(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-257);*r1p=r1;*r0p=r0;}
+static void f100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,512);*r1p=r1;*r0p=r0;}
+static void f101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-512);*r1p=r1;*r0p=r0;}
+static void f102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,511);*r1p=r1;*r0p=r0;}
+static void f103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-513);*r1p=r1;*r0p=r0;}
+static void f104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,1024);*r1p=r1;*r0p=r0;}
+static void f105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,1023);*r1p=r1;*r0p=r0;}
+static void f107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,2048);*r1p=r1;*r0p=r0;}
+static void f109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,2047);*r1p=r1;*r0p=r0;}
+static void f111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,4096);*r1p=r1;*r0p=r0;}
+static void f113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,4095);*r1p=r1;*r0p=r0;}
+static void f115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,8192);*r1p=r1;*r0p=r0;}
+static void f117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,8191);*r1p=r1;*r0p=r0;}
+static void f119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,16384);*r1p=r1;*r0p=r0;}
+static void f121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,16383);*r1p=r1;*r0p=r0;}
+static void f123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,32768);*r1p=r1;*r0p=r0;}
+static void f125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,32767);*r1p=r1;*r0p=r0;}
+static void f127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,1);*r1p=r1;*r0p=r0;}
+static void f129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-1);*r1p=r1;*r0p=r0;}
+static void f130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,0);*r1p=r1;*r0p=r0;}
+static void f131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-2);*r1p=r1;*r0p=r0;}
+static void f132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,2);*r1p=r1;*r0p=r0;}
+static void f133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-2);*r1p=r1;*r0p=r0;}
+static void f134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,1);*r1p=r1;*r0p=r0;}
+static void f135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-3);*r1p=r1;*r0p=r0;}
+static void f136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,4);*r1p=r1;*r0p=r0;}
+static void f137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-4);*r1p=r1;*r0p=r0;}
+static void f138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,3);*r1p=r1;*r0p=r0;}
+static void f139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-5);*r1p=r1;*r0p=r0;}
+static void f140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,8);*r1p=r1;*r0p=r0;}
+static void f141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-8);*r1p=r1;*r0p=r0;}
+static void f142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,7);*r1p=r1;*r0p=r0;}
+static void f143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-9);*r1p=r1;*r0p=r0;}
+static void f144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,16);*r1p=r1;*r0p=r0;}
+static void f145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-16);*r1p=r1;*r0p=r0;}
+static void f146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,15);*r1p=r1;*r0p=r0;}
+static void f147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-17);*r1p=r1;*r0p=r0;}
+static void f148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,32);*r1p=r1;*r0p=r0;}
+static void f149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-32);*r1p=r1;*r0p=r0;}
+static void f150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,31);*r1p=r1;*r0p=r0;}
+static void f151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-33);*r1p=r1;*r0p=r0;}
+static void f152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,64);*r1p=r1;*r0p=r0;}
+static void f153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-64);*r1p=r1;*r0p=r0;}
+static void f154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,63);*r1p=r1;*r0p=r0;}
+static void f155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-65);*r1p=r1;*r0p=r0;}
+static void f156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,128);*r1p=r1;*r0p=r0;}
+static void f157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-128);*r1p=r1;*r0p=r0;}
+static void f158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,127);*r1p=r1;*r0p=r0;}
+static void f159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-129);*r1p=r1;*r0p=r0;}
+static void f160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,256);*r1p=r1;*r0p=r0;}
+static void f161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-256);*r1p=r1;*r0p=r0;}
+static void f162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,255);*r1p=r1;*r0p=r0;}
+static void f163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-257);*r1p=r1;*r0p=r0;}
+static void f164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,512);*r1p=r1;*r0p=r0;}
+static void f165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-512);*r1p=r1;*r0p=r0;}
+static void f166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,511);*r1p=r1;*r0p=r0;}
+static void f167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-513);*r1p=r1;*r0p=r0;}
+static void f168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,1024);*r1p=r1;*r0p=r0;}
+static void f169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-1024);*r1p=r1;*r0p=r0;}
+static void f170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,1023);*r1p=r1;*r0p=r0;}
+static void f171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-1025);*r1p=r1;*r0p=r0;}
+static void f172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,2048);*r1p=r1;*r0p=r0;}
+static void f173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-2048);*r1p=r1;*r0p=r0;}
+static void f174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,2047);*r1p=r1;*r0p=r0;}
+static void f175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-2049);*r1p=r1;*r0p=r0;}
+static void f176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,4096);*r1p=r1;*r0p=r0;}
+static void f177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-4096);*r1p=r1;*r0p=r0;}
+static void f178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,4095);*r1p=r1;*r0p=r0;}
+static void f179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-4097);*r1p=r1;*r0p=r0;}
+static void f180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,8192);*r1p=r1;*r0p=r0;}
+static void f181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-8192);*r1p=r1;*r0p=r0;}
+static void f182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,8191);*r1p=r1;*r0p=r0;}
+static void f183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-8193);*r1p=r1;*r0p=r0;}
+static void f184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,16384);*r1p=r1;*r0p=r0;}
+static void f185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-16384);*r1p=r1;*r0p=r0;}
+static void f186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,16383);*r1p=r1;*r0p=r0;}
+static void f187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-16385);*r1p=r1;*r0p=r0;}
+static void f188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,32768);*r1p=r1;*r0p=r0;}
+static void f189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-32768);*r1p=r1;*r0p=r0;}
+static void f190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,32767);*r1p=r1;*r0p=r0;}
+static void f191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,0,0,-32769);*r1p=r1;*r0p=r0;}
+static void f192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1);*r1p=r1;*r0p=r0;}
+static void f194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,0);*r1p=r1;*r0p=r0;}
+static void f195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2);*r1p=r1;*r0p=r0;}
+static void f197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-3);*r1p=r1;*r0p=r0;}
+static void f200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4);*r1p=r1;*r0p=r0;}
+static void f201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4);*r1p=r1;*r0p=r0;}
+static void f202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,3);*r1p=r1;*r0p=r0;}
+static void f203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-5);*r1p=r1;*r0p=r0;}
+static void f204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8);*r1p=r1;*r0p=r0;}
+static void f205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8);*r1p=r1;*r0p=r0;}
+static void f206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,7);*r1p=r1;*r0p=r0;}
+static void f207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-9);*r1p=r1;*r0p=r0;}
+static void f208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16);*r1p=r1;*r0p=r0;}
+static void f209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16);*r1p=r1;*r0p=r0;}
+static void f210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,15);*r1p=r1;*r0p=r0;}
+static void f211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-17);*r1p=r1;*r0p=r0;}
+static void f212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32);*r1p=r1;*r0p=r0;}
+static void f213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32);*r1p=r1;*r0p=r0;}
+static void f214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,31);*r1p=r1;*r0p=r0;}
+static void f215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-33);*r1p=r1;*r0p=r0;}
+static void f216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,64);*r1p=r1;*r0p=r0;}
+static void f217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-64);*r1p=r1;*r0p=r0;}
+static void f218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,63);*r1p=r1;*r0p=r0;}
+static void f219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-65);*r1p=r1;*r0p=r0;}
+static void f220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,128);*r1p=r1;*r0p=r0;}
+static void f221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-128);*r1p=r1;*r0p=r0;}
+static void f222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,127);*r1p=r1;*r0p=r0;}
+static void f223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-129);*r1p=r1;*r0p=r0;}
+static void f224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,256);*r1p=r1;*r0p=r0;}
+static void f225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-256);*r1p=r1;*r0p=r0;}
+static void f226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,255);*r1p=r1;*r0p=r0;}
+static void f227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-257);*r1p=r1;*r0p=r0;}
+static void f228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,512);*r1p=r1;*r0p=r0;}
+static void f229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-512);*r1p=r1;*r0p=r0;}
+static void f230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,511);*r1p=r1;*r0p=r0;}
+static void f231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-513);*r1p=r1;*r0p=r0;}
+static void f232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1024);*r1p=r1;*r0p=r0;}
+static void f233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1023);*r1p=r1;*r0p=r0;}
+static void f235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2048);*r1p=r1;*r0p=r0;}
+static void f237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2047);*r1p=r1;*r0p=r0;}
+static void f239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4096);*r1p=r1;*r0p=r0;}
+static void f241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4095);*r1p=r1;*r0p=r0;}
+static void f243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8192);*r1p=r1;*r0p=r0;}
+static void f245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8191);*r1p=r1;*r0p=r0;}
+static void f247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16384);*r1p=r1;*r0p=r0;}
+static void f249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16383);*r1p=r1;*r0p=r0;}
+static void f251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32768);*r1p=r1;*r0p=r0;}
+static void f253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32767);*r1p=r1;*r0p=r0;}
+static void f255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,1);*r1p=r1;*r0p=r0;}
+static void f257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-1);*r1p=r1;*r0p=r0;}
+static void f258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,0);*r1p=r1;*r0p=r0;}
+static void f259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-2);*r1p=r1;*r0p=r0;}
+static void f260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,2);*r1p=r1;*r0p=r0;}
+static void f261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-2);*r1p=r1;*r0p=r0;}
+static void f262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,1);*r1p=r1;*r0p=r0;}
+static void f263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-3);*r1p=r1;*r0p=r0;}
+static void f264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,4);*r1p=r1;*r0p=r0;}
+static void f265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-4);*r1p=r1;*r0p=r0;}
+static void f266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,3);*r1p=r1;*r0p=r0;}
+static void f267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-5);*r1p=r1;*r0p=r0;}
+static void f268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,8);*r1p=r1;*r0p=r0;}
+static void f269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-8);*r1p=r1;*r0p=r0;}
+static void f270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,7);*r1p=r1;*r0p=r0;}
+static void f271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-9);*r1p=r1;*r0p=r0;}
+static void f272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,16);*r1p=r1;*r0p=r0;}
+static void f273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-16);*r1p=r1;*r0p=r0;}
+static void f274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,15);*r1p=r1;*r0p=r0;}
+static void f275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-17);*r1p=r1;*r0p=r0;}
+static void f276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,32);*r1p=r1;*r0p=r0;}
+static void f277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-32);*r1p=r1;*r0p=r0;}
+static void f278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,31);*r1p=r1;*r0p=r0;}
+static void f279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-33);*r1p=r1;*r0p=r0;}
+static void f280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,64);*r1p=r1;*r0p=r0;}
+static void f281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-64);*r1p=r1;*r0p=r0;}
+static void f282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,63);*r1p=r1;*r0p=r0;}
+static void f283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-65);*r1p=r1;*r0p=r0;}
+static void f284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,128);*r1p=r1;*r0p=r0;}
+static void f285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-128);*r1p=r1;*r0p=r0;}
+static void f286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,127);*r1p=r1;*r0p=r0;}
+static void f287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-129);*r1p=r1;*r0p=r0;}
+static void f288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,256);*r1p=r1;*r0p=r0;}
+static void f289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-256);*r1p=r1;*r0p=r0;}
+static void f290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,255);*r1p=r1;*r0p=r0;}
+static void f291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-257);*r1p=r1;*r0p=r0;}
+static void f292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,512);*r1p=r1;*r0p=r0;}
+static void f293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-512);*r1p=r1;*r0p=r0;}
+static void f294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,511);*r1p=r1;*r0p=r0;}
+static void f295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-513);*r1p=r1;*r0p=r0;}
+static void f296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,1024);*r1p=r1;*r0p=r0;}
+static void f297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,1023);*r1p=r1;*r0p=r0;}
+static void f299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,2048);*r1p=r1;*r0p=r0;}
+static void f301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,2047);*r1p=r1;*r0p=r0;}
+static void f303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,4096);*r1p=r1;*r0p=r0;}
+static void f305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,4095);*r1p=r1;*r0p=r0;}
+static void f307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,8192);*r1p=r1;*r0p=r0;}
+static void f309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,8191);*r1p=r1;*r0p=r0;}
+static void f311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,16384);*r1p=r1;*r0p=r0;}
+static void f313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,16383);*r1p=r1;*r0p=r0;}
+static void f315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,32768);*r1p=r1;*r0p=r0;}
+static void f317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,32767);*r1p=r1;*r0p=r0;}
+static void f319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1);*r1p=r1;*r0p=r0;}
+static void f322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,0);*r1p=r1;*r0p=r0;}
+static void f323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2);*r1p=r1;*r0p=r0;}
+static void f325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-3);*r1p=r1;*r0p=r0;}
+static void f328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4);*r1p=r1;*r0p=r0;}
+static void f329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4);*r1p=r1;*r0p=r0;}
+static void f330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,3);*r1p=r1;*r0p=r0;}
+static void f331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-5);*r1p=r1;*r0p=r0;}
+static void f332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8);*r1p=r1;*r0p=r0;}
+static void f333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8);*r1p=r1;*r0p=r0;}
+static void f334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,7);*r1p=r1;*r0p=r0;}
+static void f335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-9);*r1p=r1;*r0p=r0;}
+static void f336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16);*r1p=r1;*r0p=r0;}
+static void f337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16);*r1p=r1;*r0p=r0;}
+static void f338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,15);*r1p=r1;*r0p=r0;}
+static void f339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-17);*r1p=r1;*r0p=r0;}
+static void f340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32);*r1p=r1;*r0p=r0;}
+static void f341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32);*r1p=r1;*r0p=r0;}
+static void f342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,31);*r1p=r1;*r0p=r0;}
+static void f343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-33);*r1p=r1;*r0p=r0;}
+static void f344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,64);*r1p=r1;*r0p=r0;}
+static void f345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-64);*r1p=r1;*r0p=r0;}
+static void f346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,63);*r1p=r1;*r0p=r0;}
+static void f347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-65);*r1p=r1;*r0p=r0;}
+static void f348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,128);*r1p=r1;*r0p=r0;}
+static void f349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-128);*r1p=r1;*r0p=r0;}
+static void f350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,127);*r1p=r1;*r0p=r0;}
+static void f351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-129);*r1p=r1;*r0p=r0;}
+static void f352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,256);*r1p=r1;*r0p=r0;}
+static void f353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-256);*r1p=r1;*r0p=r0;}
+static void f354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,255);*r1p=r1;*r0p=r0;}
+static void f355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-257);*r1p=r1;*r0p=r0;}
+static void f356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,512);*r1p=r1;*r0p=r0;}
+static void f357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-512);*r1p=r1;*r0p=r0;}
+static void f358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,511);*r1p=r1;*r0p=r0;}
+static void f359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-513);*r1p=r1;*r0p=r0;}
+static void f360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1024);*r1p=r1;*r0p=r0;}
+static void f361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,1023);*r1p=r1;*r0p=r0;}
+static void f363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2048);*r1p=r1;*r0p=r0;}
+static void f365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,2047);*r1p=r1;*r0p=r0;}
+static void f367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4096);*r1p=r1;*r0p=r0;}
+static void f369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,4095);*r1p=r1;*r0p=r0;}
+static void f371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8192);*r1p=r1;*r0p=r0;}
+static void f373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,8191);*r1p=r1;*r0p=r0;}
+static void f375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16384);*r1p=r1;*r0p=r0;}
+static void f377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,16383);*r1p=r1;*r0p=r0;}
+static void f379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32768);*r1p=r1;*r0p=r0;}
+static void f381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,32767);*r1p=r1;*r0p=r0;}
+static void f383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1);*r1p=r1;*r0p=r0;}
+static void f386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,0);*r1p=r1;*r0p=r0;}
+static void f387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2);*r1p=r1;*r0p=r0;}
+static void f389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-3);*r1p=r1;*r0p=r0;}
+static void f392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4);*r1p=r1;*r0p=r0;}
+static void f393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4);*r1p=r1;*r0p=r0;}
+static void f394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,3);*r1p=r1;*r0p=r0;}
+static void f395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-5);*r1p=r1;*r0p=r0;}
+static void f396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8);*r1p=r1;*r0p=r0;}
+static void f397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8);*r1p=r1;*r0p=r0;}
+static void f398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,7);*r1p=r1;*r0p=r0;}
+static void f399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-9);*r1p=r1;*r0p=r0;}
+static void f400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16);*r1p=r1;*r0p=r0;}
+static void f401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16);*r1p=r1;*r0p=r0;}
+static void f402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,15);*r1p=r1;*r0p=r0;}
+static void f403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-17);*r1p=r1;*r0p=r0;}
+static void f404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32);*r1p=r1;*r0p=r0;}
+static void f405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32);*r1p=r1;*r0p=r0;}
+static void f406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,31);*r1p=r1;*r0p=r0;}
+static void f407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-33);*r1p=r1;*r0p=r0;}
+static void f408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,64);*r1p=r1;*r0p=r0;}
+static void f409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-64);*r1p=r1;*r0p=r0;}
+static void f410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,63);*r1p=r1;*r0p=r0;}
+static void f411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-65);*r1p=r1;*r0p=r0;}
+static void f412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,128);*r1p=r1;*r0p=r0;}
+static void f413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-128);*r1p=r1;*r0p=r0;}
+static void f414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,127);*r1p=r1;*r0p=r0;}
+static void f415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-129);*r1p=r1;*r0p=r0;}
+static void f416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,256);*r1p=r1;*r0p=r0;}
+static void f417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-256);*r1p=r1;*r0p=r0;}
+static void f418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,255);*r1p=r1;*r0p=r0;}
+static void f419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-257);*r1p=r1;*r0p=r0;}
+static void f420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,512);*r1p=r1;*r0p=r0;}
+static void f421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-512);*r1p=r1;*r0p=r0;}
+static void f422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,511);*r1p=r1;*r0p=r0;}
+static void f423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-513);*r1p=r1;*r0p=r0;}
+static void f424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1024);*r1p=r1;*r0p=r0;}
+static void f425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,1023);*r1p=r1;*r0p=r0;}
+static void f427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2048);*r1p=r1;*r0p=r0;}
+static void f429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,2047);*r1p=r1;*r0p=r0;}
+static void f431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4096);*r1p=r1;*r0p=r0;}
+static void f433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,4095);*r1p=r1;*r0p=r0;}
+static void f435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8192);*r1p=r1;*r0p=r0;}
+static void f437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,8191);*r1p=r1;*r0p=r0;}
+static void f439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16384);*r1p=r1;*r0p=r0;}
+static void f441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,16383);*r1p=r1;*r0p=r0;}
+static void f443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32768);*r1p=r1;*r0p=r0;}
+static void f445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,32767);*r1p=r1;*r0p=r0;}
+static void f447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,1);*r1p=r1;*r0p=r0;}
+static void f449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-1);*r1p=r1;*r0p=r0;}
+static void f450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,0);*r1p=r1;*r0p=r0;}
+static void f451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-2);*r1p=r1;*r0p=r0;}
+static void f452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,2);*r1p=r1;*r0p=r0;}
+static void f453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-2);*r1p=r1;*r0p=r0;}
+static void f454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,1);*r1p=r1;*r0p=r0;}
+static void f455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-3);*r1p=r1;*r0p=r0;}
+static void f456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,4);*r1p=r1;*r0p=r0;}
+static void f457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-4);*r1p=r1;*r0p=r0;}
+static void f458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,3);*r1p=r1;*r0p=r0;}
+static void f459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-5);*r1p=r1;*r0p=r0;}
+static void f460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,8);*r1p=r1;*r0p=r0;}
+static void f461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-8);*r1p=r1;*r0p=r0;}
+static void f462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,7);*r1p=r1;*r0p=r0;}
+static void f463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-9);*r1p=r1;*r0p=r0;}
+static void f464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,16);*r1p=r1;*r0p=r0;}
+static void f465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-16);*r1p=r1;*r0p=r0;}
+static void f466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,15);*r1p=r1;*r0p=r0;}
+static void f467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-17);*r1p=r1;*r0p=r0;}
+static void f468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,32);*r1p=r1;*r0p=r0;}
+static void f469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-32);*r1p=r1;*r0p=r0;}
+static void f470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,31);*r1p=r1;*r0p=r0;}
+static void f471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-33);*r1p=r1;*r0p=r0;}
+static void f472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,64);*r1p=r1;*r0p=r0;}
+static void f473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-64);*r1p=r1;*r0p=r0;}
+static void f474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,63);*r1p=r1;*r0p=r0;}
+static void f475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-65);*r1p=r1;*r0p=r0;}
+static void f476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,128);*r1p=r1;*r0p=r0;}
+static void f477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-128);*r1p=r1;*r0p=r0;}
+static void f478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,127);*r1p=r1;*r0p=r0;}
+static void f479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-129);*r1p=r1;*r0p=r0;}
+static void f480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,256);*r1p=r1;*r0p=r0;}
+static void f481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-256);*r1p=r1;*r0p=r0;}
+static void f482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,255);*r1p=r1;*r0p=r0;}
+static void f483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-257);*r1p=r1;*r0p=r0;}
+static void f484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,512);*r1p=r1;*r0p=r0;}
+static void f485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-512);*r1p=r1;*r0p=r0;}
+static void f486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,511);*r1p=r1;*r0p=r0;}
+static void f487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-513);*r1p=r1;*r0p=r0;}
+static void f488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,1024);*r1p=r1;*r0p=r0;}
+static void f489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-1024);*r1p=r1;*r0p=r0;}
+static void f490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,1023);*r1p=r1;*r0p=r0;}
+static void f491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-1025);*r1p=r1;*r0p=r0;}
+static void f492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,2048);*r1p=r1;*r0p=r0;}
+static void f493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-2048);*r1p=r1;*r0p=r0;}
+static void f494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,2047);*r1p=r1;*r0p=r0;}
+static void f495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-2049);*r1p=r1;*r0p=r0;}
+static void f496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,4096);*r1p=r1;*r0p=r0;}
+static void f497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-4096);*r1p=r1;*r0p=r0;}
+static void f498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,4095);*r1p=r1;*r0p=r0;}
+static void f499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-4097);*r1p=r1;*r0p=r0;}
+static void f500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,8192);*r1p=r1;*r0p=r0;}
+static void f501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-8192);*r1p=r1;*r0p=r0;}
+static void f502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,8191);*r1p=r1;*r0p=r0;}
+static void f503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-8193);*r1p=r1;*r0p=r0;}
+static void f504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,16384);*r1p=r1;*r0p=r0;}
+static void f505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-16384);*r1p=r1;*r0p=r0;}
+static void f506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,16383);*r1p=r1;*r0p=r0;}
+static void f507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-16385);*r1p=r1;*r0p=r0;}
+static void f508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,32768);*r1p=r1;*r0p=r0;}
+static void f509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-32768);*r1p=r1;*r0p=r0;}
+static void f510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,32767);*r1p=r1;*r0p=r0;}
+static void f511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-3,0,-32769);*r1p=r1;*r0p=r0;}
+static void f512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,1);*r1p=r1;*r0p=r0;}
+static void f513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-1);*r1p=r1;*r0p=r0;}
+static void f514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,0);*r1p=r1;*r0p=r0;}
+static void f515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-2);*r1p=r1;*r0p=r0;}
+static void f516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,2);*r1p=r1;*r0p=r0;}
+static void f517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-2);*r1p=r1;*r0p=r0;}
+static void f518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,1);*r1p=r1;*r0p=r0;}
+static void f519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-3);*r1p=r1;*r0p=r0;}
+static void f520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,4);*r1p=r1;*r0p=r0;}
+static void f521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-4);*r1p=r1;*r0p=r0;}
+static void f522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,3);*r1p=r1;*r0p=r0;}
+static void f523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-5);*r1p=r1;*r0p=r0;}
+static void f524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,8);*r1p=r1;*r0p=r0;}
+static void f525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-8);*r1p=r1;*r0p=r0;}
+static void f526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,7);*r1p=r1;*r0p=r0;}
+static void f527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-9);*r1p=r1;*r0p=r0;}
+static void f528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,16);*r1p=r1;*r0p=r0;}
+static void f529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-16);*r1p=r1;*r0p=r0;}
+static void f530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,15);*r1p=r1;*r0p=r0;}
+static void f531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-17);*r1p=r1;*r0p=r0;}
+static void f532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,32);*r1p=r1;*r0p=r0;}
+static void f533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-32);*r1p=r1;*r0p=r0;}
+static void f534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,31);*r1p=r1;*r0p=r0;}
+static void f535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-33);*r1p=r1;*r0p=r0;}
+static void f536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,64);*r1p=r1;*r0p=r0;}
+static void f537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-64);*r1p=r1;*r0p=r0;}
+static void f538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,63);*r1p=r1;*r0p=r0;}
+static void f539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-65);*r1p=r1;*r0p=r0;}
+static void f540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,128);*r1p=r1;*r0p=r0;}
+static void f541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-128);*r1p=r1;*r0p=r0;}
+static void f542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,127);*r1p=r1;*r0p=r0;}
+static void f543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-129);*r1p=r1;*r0p=r0;}
+static void f544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,256);*r1p=r1;*r0p=r0;}
+static void f545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-256);*r1p=r1;*r0p=r0;}
+static void f546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,255);*r1p=r1;*r0p=r0;}
+static void f547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-257);*r1p=r1;*r0p=r0;}
+static void f548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,512);*r1p=r1;*r0p=r0;}
+static void f549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-512);*r1p=r1;*r0p=r0;}
+static void f550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,511);*r1p=r1;*r0p=r0;}
+static void f551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-513);*r1p=r1;*r0p=r0;}
+static void f552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,1024);*r1p=r1;*r0p=r0;}
+static void f553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-1024);*r1p=r1;*r0p=r0;}
+static void f554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,1023);*r1p=r1;*r0p=r0;}
+static void f555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-1025);*r1p=r1;*r0p=r0;}
+static void f556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,2048);*r1p=r1;*r0p=r0;}
+static void f557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-2048);*r1p=r1;*r0p=r0;}
+static void f558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,2047);*r1p=r1;*r0p=r0;}
+static void f559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-2049);*r1p=r1;*r0p=r0;}
+static void f560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,4096);*r1p=r1;*r0p=r0;}
+static void f561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-4096);*r1p=r1;*r0p=r0;}
+static void f562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,4095);*r1p=r1;*r0p=r0;}
+static void f563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-4097);*r1p=r1;*r0p=r0;}
+static void f564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,8192);*r1p=r1;*r0p=r0;}
+static void f565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-8192);*r1p=r1;*r0p=r0;}
+static void f566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,8191);*r1p=r1;*r0p=r0;}
+static void f567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-8193);*r1p=r1;*r0p=r0;}
+static void f568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,16384);*r1p=r1;*r0p=r0;}
+static void f569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-16384);*r1p=r1;*r0p=r0;}
+static void f570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,16383);*r1p=r1;*r0p=r0;}
+static void f571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-16385);*r1p=r1;*r0p=r0;}
+static void f572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,32768);*r1p=r1;*r0p=r0;}
+static void f573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-32768);*r1p=r1;*r0p=r0;}
+static void f574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,32767);*r1p=r1;*r0p=r0;}
+static void f575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4,0,-32769);*r1p=r1;*r0p=r0;}
+static void f576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,1);*r1p=r1;*r0p=r0;}
+static void f577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-1);*r1p=r1;*r0p=r0;}
+static void f578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,0);*r1p=r1;*r0p=r0;}
+static void f579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-2);*r1p=r1;*r0p=r0;}
+static void f580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,2);*r1p=r1;*r0p=r0;}
+static void f581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-2);*r1p=r1;*r0p=r0;}
+static void f582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,1);*r1p=r1;*r0p=r0;}
+static void f583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-3);*r1p=r1;*r0p=r0;}
+static void f584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,4);*r1p=r1;*r0p=r0;}
+static void f585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-4);*r1p=r1;*r0p=r0;}
+static void f586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,3);*r1p=r1;*r0p=r0;}
+static void f587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-5);*r1p=r1;*r0p=r0;}
+static void f588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,8);*r1p=r1;*r0p=r0;}
+static void f589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-8);*r1p=r1;*r0p=r0;}
+static void f590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,7);*r1p=r1;*r0p=r0;}
+static void f591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-9);*r1p=r1;*r0p=r0;}
+static void f592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,16);*r1p=r1;*r0p=r0;}
+static void f593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-16);*r1p=r1;*r0p=r0;}
+static void f594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,15);*r1p=r1;*r0p=r0;}
+static void f595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-17);*r1p=r1;*r0p=r0;}
+static void f596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,32);*r1p=r1;*r0p=r0;}
+static void f597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-32);*r1p=r1;*r0p=r0;}
+static void f598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,31);*r1p=r1;*r0p=r0;}
+static void f599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-33);*r1p=r1;*r0p=r0;}
+static void f600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,64);*r1p=r1;*r0p=r0;}
+static void f601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-64);*r1p=r1;*r0p=r0;}
+static void f602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,63);*r1p=r1;*r0p=r0;}
+static void f603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-65);*r1p=r1;*r0p=r0;}
+static void f604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,128);*r1p=r1;*r0p=r0;}
+static void f605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-128);*r1p=r1;*r0p=r0;}
+static void f606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,127);*r1p=r1;*r0p=r0;}
+static void f607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-129);*r1p=r1;*r0p=r0;}
+static void f608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,256);*r1p=r1;*r0p=r0;}
+static void f609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-256);*r1p=r1;*r0p=r0;}
+static void f610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,255);*r1p=r1;*r0p=r0;}
+static void f611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-257);*r1p=r1;*r0p=r0;}
+static void f612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,512);*r1p=r1;*r0p=r0;}
+static void f613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-512);*r1p=r1;*r0p=r0;}
+static void f614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,511);*r1p=r1;*r0p=r0;}
+static void f615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-513);*r1p=r1;*r0p=r0;}
+static void f616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,1024);*r1p=r1;*r0p=r0;}
+static void f617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-1024);*r1p=r1;*r0p=r0;}
+static void f618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,1023);*r1p=r1;*r0p=r0;}
+static void f619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-1025);*r1p=r1;*r0p=r0;}
+static void f620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,2048);*r1p=r1;*r0p=r0;}
+static void f621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-2048);*r1p=r1;*r0p=r0;}
+static void f622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,2047);*r1p=r1;*r0p=r0;}
+static void f623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-2049);*r1p=r1;*r0p=r0;}
+static void f624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,4096);*r1p=r1;*r0p=r0;}
+static void f625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-4096);*r1p=r1;*r0p=r0;}
+static void f626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,4095);*r1p=r1;*r0p=r0;}
+static void f627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-4097);*r1p=r1;*r0p=r0;}
+static void f628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,8192);*r1p=r1;*r0p=r0;}
+static void f629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-8192);*r1p=r1;*r0p=r0;}
+static void f630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,8191);*r1p=r1;*r0p=r0;}
+static void f631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-8193);*r1p=r1;*r0p=r0;}
+static void f632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,16384);*r1p=r1;*r0p=r0;}
+static void f633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-16384);*r1p=r1;*r0p=r0;}
+static void f634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,16383);*r1p=r1;*r0p=r0;}
+static void f635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-16385);*r1p=r1;*r0p=r0;}
+static void f636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,32768);*r1p=r1;*r0p=r0;}
+static void f637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-32768);*r1p=r1;*r0p=r0;}
+static void f638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,32767);*r1p=r1;*r0p=r0;}
+static void f639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4,0,-32769);*r1p=r1;*r0p=r0;}
+static void f640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,1);*r1p=r1;*r0p=r0;}
+static void f641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-1);*r1p=r1;*r0p=r0;}
+static void f642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,0);*r1p=r1;*r0p=r0;}
+static void f643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-2);*r1p=r1;*r0p=r0;}
+static void f644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,2);*r1p=r1;*r0p=r0;}
+static void f645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-2);*r1p=r1;*r0p=r0;}
+static void f646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,1);*r1p=r1;*r0p=r0;}
+static void f647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-3);*r1p=r1;*r0p=r0;}
+static void f648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,4);*r1p=r1;*r0p=r0;}
+static void f649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-4);*r1p=r1;*r0p=r0;}
+static void f650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,3);*r1p=r1;*r0p=r0;}
+static void f651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-5);*r1p=r1;*r0p=r0;}
+static void f652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,8);*r1p=r1;*r0p=r0;}
+static void f653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-8);*r1p=r1;*r0p=r0;}
+static void f654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,7);*r1p=r1;*r0p=r0;}
+static void f655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-9);*r1p=r1;*r0p=r0;}
+static void f656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,16);*r1p=r1;*r0p=r0;}
+static void f657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-16);*r1p=r1;*r0p=r0;}
+static void f658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,15);*r1p=r1;*r0p=r0;}
+static void f659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-17);*r1p=r1;*r0p=r0;}
+static void f660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,32);*r1p=r1;*r0p=r0;}
+static void f661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-32);*r1p=r1;*r0p=r0;}
+static void f662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,31);*r1p=r1;*r0p=r0;}
+static void f663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-33);*r1p=r1;*r0p=r0;}
+static void f664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,64);*r1p=r1;*r0p=r0;}
+static void f665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-64);*r1p=r1;*r0p=r0;}
+static void f666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,63);*r1p=r1;*r0p=r0;}
+static void f667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-65);*r1p=r1;*r0p=r0;}
+static void f668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,128);*r1p=r1;*r0p=r0;}
+static void f669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-128);*r1p=r1;*r0p=r0;}
+static void f670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,127);*r1p=r1;*r0p=r0;}
+static void f671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-129);*r1p=r1;*r0p=r0;}
+static void f672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,256);*r1p=r1;*r0p=r0;}
+static void f673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-256);*r1p=r1;*r0p=r0;}
+static void f674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,255);*r1p=r1;*r0p=r0;}
+static void f675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-257);*r1p=r1;*r0p=r0;}
+static void f676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,512);*r1p=r1;*r0p=r0;}
+static void f677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-512);*r1p=r1;*r0p=r0;}
+static void f678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,511);*r1p=r1;*r0p=r0;}
+static void f679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-513);*r1p=r1;*r0p=r0;}
+static void f680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,1024);*r1p=r1;*r0p=r0;}
+static void f681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-1024);*r1p=r1;*r0p=r0;}
+static void f682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,1023);*r1p=r1;*r0p=r0;}
+static void f683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-1025);*r1p=r1;*r0p=r0;}
+static void f684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,2048);*r1p=r1;*r0p=r0;}
+static void f685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-2048);*r1p=r1;*r0p=r0;}
+static void f686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,2047);*r1p=r1;*r0p=r0;}
+static void f687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-2049);*r1p=r1;*r0p=r0;}
+static void f688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,4096);*r1p=r1;*r0p=r0;}
+static void f689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-4096);*r1p=r1;*r0p=r0;}
+static void f690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,4095);*r1p=r1;*r0p=r0;}
+static void f691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-4097);*r1p=r1;*r0p=r0;}
+static void f692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,8192);*r1p=r1;*r0p=r0;}
+static void f693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-8192);*r1p=r1;*r0p=r0;}
+static void f694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,8191);*r1p=r1;*r0p=r0;}
+static void f695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-8193);*r1p=r1;*r0p=r0;}
+static void f696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,16384);*r1p=r1;*r0p=r0;}
+static void f697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-16384);*r1p=r1;*r0p=r0;}
+static void f698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,16383);*r1p=r1;*r0p=r0;}
+static void f699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-16385);*r1p=r1;*r0p=r0;}
+static void f700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,32768);*r1p=r1;*r0p=r0;}
+static void f701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-32768);*r1p=r1;*r0p=r0;}
+static void f702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,32767);*r1p=r1;*r0p=r0;}
+static void f703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,3,0,-32769);*r1p=r1;*r0p=r0;}
+static void f704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,1);*r1p=r1;*r0p=r0;}
+static void f705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-1);*r1p=r1;*r0p=r0;}
+static void f706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,0);*r1p=r1;*r0p=r0;}
+static void f707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-2);*r1p=r1;*r0p=r0;}
+static void f708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,2);*r1p=r1;*r0p=r0;}
+static void f709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-2);*r1p=r1;*r0p=r0;}
+static void f710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,1);*r1p=r1;*r0p=r0;}
+static void f711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-3);*r1p=r1;*r0p=r0;}
+static void f712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,4);*r1p=r1;*r0p=r0;}
+static void f713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-4);*r1p=r1;*r0p=r0;}
+static void f714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,3);*r1p=r1;*r0p=r0;}
+static void f715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-5);*r1p=r1;*r0p=r0;}
+static void f716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,8);*r1p=r1;*r0p=r0;}
+static void f717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-8);*r1p=r1;*r0p=r0;}
+static void f718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,7);*r1p=r1;*r0p=r0;}
+static void f719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-9);*r1p=r1;*r0p=r0;}
+static void f720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,16);*r1p=r1;*r0p=r0;}
+static void f721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-16);*r1p=r1;*r0p=r0;}
+static void f722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,15);*r1p=r1;*r0p=r0;}
+static void f723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-17);*r1p=r1;*r0p=r0;}
+static void f724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,32);*r1p=r1;*r0p=r0;}
+static void f725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-32);*r1p=r1;*r0p=r0;}
+static void f726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,31);*r1p=r1;*r0p=r0;}
+static void f727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-33);*r1p=r1;*r0p=r0;}
+static void f728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,64);*r1p=r1;*r0p=r0;}
+static void f729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-64);*r1p=r1;*r0p=r0;}
+static void f730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,63);*r1p=r1;*r0p=r0;}
+static void f731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-65);*r1p=r1;*r0p=r0;}
+static void f732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,128);*r1p=r1;*r0p=r0;}
+static void f733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-128);*r1p=r1;*r0p=r0;}
+static void f734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,127);*r1p=r1;*r0p=r0;}
+static void f735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-129);*r1p=r1;*r0p=r0;}
+static void f736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,256);*r1p=r1;*r0p=r0;}
+static void f737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-256);*r1p=r1;*r0p=r0;}
+static void f738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,255);*r1p=r1;*r0p=r0;}
+static void f739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-257);*r1p=r1;*r0p=r0;}
+static void f740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,512);*r1p=r1;*r0p=r0;}
+static void f741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-512);*r1p=r1;*r0p=r0;}
+static void f742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,511);*r1p=r1;*r0p=r0;}
+static void f743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-513);*r1p=r1;*r0p=r0;}
+static void f744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,1024);*r1p=r1;*r0p=r0;}
+static void f745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-1024);*r1p=r1;*r0p=r0;}
+static void f746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,1023);*r1p=r1;*r0p=r0;}
+static void f747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-1025);*r1p=r1;*r0p=r0;}
+static void f748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,2048);*r1p=r1;*r0p=r0;}
+static void f749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-2048);*r1p=r1;*r0p=r0;}
+static void f750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,2047);*r1p=r1;*r0p=r0;}
+static void f751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-2049);*r1p=r1;*r0p=r0;}
+static void f752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,4096);*r1p=r1;*r0p=r0;}
+static void f753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-4096);*r1p=r1;*r0p=r0;}
+static void f754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,4095);*r1p=r1;*r0p=r0;}
+static void f755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-4097);*r1p=r1;*r0p=r0;}
+static void f756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,8192);*r1p=r1;*r0p=r0;}
+static void f757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-8192);*r1p=r1;*r0p=r0;}
+static void f758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,8191);*r1p=r1;*r0p=r0;}
+static void f759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-8193);*r1p=r1;*r0p=r0;}
+static void f760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,16384);*r1p=r1;*r0p=r0;}
+static void f761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-16384);*r1p=r1;*r0p=r0;}
+static void f762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,16383);*r1p=r1;*r0p=r0;}
+static void f763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-16385);*r1p=r1;*r0p=r0;}
+static void f764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,32768);*r1p=r1;*r0p=r0;}
+static void f765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-32768);*r1p=r1;*r0p=r0;}
+static void f766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,32767);*r1p=r1;*r0p=r0;}
+static void f767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-5,0,-32769);*r1p=r1;*r0p=r0;}
+static void f768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,1);*r1p=r1;*r0p=r0;}
+static void f769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-1);*r1p=r1;*r0p=r0;}
+static void f770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,0);*r1p=r1;*r0p=r0;}
+static void f771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-2);*r1p=r1;*r0p=r0;}
+static void f772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,2);*r1p=r1;*r0p=r0;}
+static void f773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-2);*r1p=r1;*r0p=r0;}
+static void f774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,1);*r1p=r1;*r0p=r0;}
+static void f775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-3);*r1p=r1;*r0p=r0;}
+static void f776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,4);*r1p=r1;*r0p=r0;}
+static void f777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-4);*r1p=r1;*r0p=r0;}
+static void f778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,3);*r1p=r1;*r0p=r0;}
+static void f779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-5);*r1p=r1;*r0p=r0;}
+static void f780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,8);*r1p=r1;*r0p=r0;}
+static void f781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-8);*r1p=r1;*r0p=r0;}
+static void f782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,7);*r1p=r1;*r0p=r0;}
+static void f783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-9);*r1p=r1;*r0p=r0;}
+static void f784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,16);*r1p=r1;*r0p=r0;}
+static void f785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-16);*r1p=r1;*r0p=r0;}
+static void f786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,15);*r1p=r1;*r0p=r0;}
+static void f787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-17);*r1p=r1;*r0p=r0;}
+static void f788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,32);*r1p=r1;*r0p=r0;}
+static void f789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-32);*r1p=r1;*r0p=r0;}
+static void f790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,31);*r1p=r1;*r0p=r0;}
+static void f791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-33);*r1p=r1;*r0p=r0;}
+static void f792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,64);*r1p=r1;*r0p=r0;}
+static void f793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-64);*r1p=r1;*r0p=r0;}
+static void f794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,63);*r1p=r1;*r0p=r0;}
+static void f795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-65);*r1p=r1;*r0p=r0;}
+static void f796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,128);*r1p=r1;*r0p=r0;}
+static void f797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-128);*r1p=r1;*r0p=r0;}
+static void f798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,127);*r1p=r1;*r0p=r0;}
+static void f799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-129);*r1p=r1;*r0p=r0;}
+static void f800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,256);*r1p=r1;*r0p=r0;}
+static void f801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-256);*r1p=r1;*r0p=r0;}
+static void f802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,255);*r1p=r1;*r0p=r0;}
+static void f803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-257);*r1p=r1;*r0p=r0;}
+static void f804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,512);*r1p=r1;*r0p=r0;}
+static void f805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-512);*r1p=r1;*r0p=r0;}
+static void f806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,511);*r1p=r1;*r0p=r0;}
+static void f807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-513);*r1p=r1;*r0p=r0;}
+static void f808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,1024);*r1p=r1;*r0p=r0;}
+static void f809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-1024);*r1p=r1;*r0p=r0;}
+static void f810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,1023);*r1p=r1;*r0p=r0;}
+static void f811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-1025);*r1p=r1;*r0p=r0;}
+static void f812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,2048);*r1p=r1;*r0p=r0;}
+static void f813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-2048);*r1p=r1;*r0p=r0;}
+static void f814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,2047);*r1p=r1;*r0p=r0;}
+static void f815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-2049);*r1p=r1;*r0p=r0;}
+static void f816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,4096);*r1p=r1;*r0p=r0;}
+static void f817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-4096);*r1p=r1;*r0p=r0;}
+static void f818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,4095);*r1p=r1;*r0p=r0;}
+static void f819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-4097);*r1p=r1;*r0p=r0;}
+static void f820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,8192);*r1p=r1;*r0p=r0;}
+static void f821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-8192);*r1p=r1;*r0p=r0;}
+static void f822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,8191);*r1p=r1;*r0p=r0;}
+static void f823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-8193);*r1p=r1;*r0p=r0;}
+static void f824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,16384);*r1p=r1;*r0p=r0;}
+static void f825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-16384);*r1p=r1;*r0p=r0;}
+static void f826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,16383);*r1p=r1;*r0p=r0;}
+static void f827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-16385);*r1p=r1;*r0p=r0;}
+static void f828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,32768);*r1p=r1;*r0p=r0;}
+static void f829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-32768);*r1p=r1;*r0p=r0;}
+static void f830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,32767);*r1p=r1;*r0p=r0;}
+static void f831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8,0,-32769);*r1p=r1;*r0p=r0;}
+static void f832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,1);*r1p=r1;*r0p=r0;}
+static void f833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-1);*r1p=r1;*r0p=r0;}
+static void f834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,0);*r1p=r1;*r0p=r0;}
+static void f835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-2);*r1p=r1;*r0p=r0;}
+static void f836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,2);*r1p=r1;*r0p=r0;}
+static void f837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-2);*r1p=r1;*r0p=r0;}
+static void f838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,1);*r1p=r1;*r0p=r0;}
+static void f839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-3);*r1p=r1;*r0p=r0;}
+static void f840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,4);*r1p=r1;*r0p=r0;}
+static void f841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-4);*r1p=r1;*r0p=r0;}
+static void f842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,3);*r1p=r1;*r0p=r0;}
+static void f843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-5);*r1p=r1;*r0p=r0;}
+static void f844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,8);*r1p=r1;*r0p=r0;}
+static void f845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-8);*r1p=r1;*r0p=r0;}
+static void f846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,7);*r1p=r1;*r0p=r0;}
+static void f847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-9);*r1p=r1;*r0p=r0;}
+static void f848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,16);*r1p=r1;*r0p=r0;}
+static void f849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-16);*r1p=r1;*r0p=r0;}
+static void f850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,15);*r1p=r1;*r0p=r0;}
+static void f851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-17);*r1p=r1;*r0p=r0;}
+static void f852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,32);*r1p=r1;*r0p=r0;}
+static void f853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-32);*r1p=r1;*r0p=r0;}
+static void f854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,31);*r1p=r1;*r0p=r0;}
+static void f855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-33);*r1p=r1;*r0p=r0;}
+static void f856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,64);*r1p=r1;*r0p=r0;}
+static void f857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-64);*r1p=r1;*r0p=r0;}
+static void f858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,63);*r1p=r1;*r0p=r0;}
+static void f859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-65);*r1p=r1;*r0p=r0;}
+static void f860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,128);*r1p=r1;*r0p=r0;}
+static void f861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-128);*r1p=r1;*r0p=r0;}
+static void f862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,127);*r1p=r1;*r0p=r0;}
+static void f863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-129);*r1p=r1;*r0p=r0;}
+static void f864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,256);*r1p=r1;*r0p=r0;}
+static void f865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-256);*r1p=r1;*r0p=r0;}
+static void f866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,255);*r1p=r1;*r0p=r0;}
+static void f867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-257);*r1p=r1;*r0p=r0;}
+static void f868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,512);*r1p=r1;*r0p=r0;}
+static void f869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-512);*r1p=r1;*r0p=r0;}
+static void f870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,511);*r1p=r1;*r0p=r0;}
+static void f871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-513);*r1p=r1;*r0p=r0;}
+static void f872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,1024);*r1p=r1;*r0p=r0;}
+static void f873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-1024);*r1p=r1;*r0p=r0;}
+static void f874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,1023);*r1p=r1;*r0p=r0;}
+static void f875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-1025);*r1p=r1;*r0p=r0;}
+static void f876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,2048);*r1p=r1;*r0p=r0;}
+static void f877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-2048);*r1p=r1;*r0p=r0;}
+static void f878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,2047);*r1p=r1;*r0p=r0;}
+static void f879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-2049);*r1p=r1;*r0p=r0;}
+static void f880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,4096);*r1p=r1;*r0p=r0;}
+static void f881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-4096);*r1p=r1;*r0p=r0;}
+static void f882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,4095);*r1p=r1;*r0p=r0;}
+static void f883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-4097);*r1p=r1;*r0p=r0;}
+static void f884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,8192);*r1p=r1;*r0p=r0;}
+static void f885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-8192);*r1p=r1;*r0p=r0;}
+static void f886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,8191);*r1p=r1;*r0p=r0;}
+static void f887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-8193);*r1p=r1;*r0p=r0;}
+static void f888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,16384);*r1p=r1;*r0p=r0;}
+static void f889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-16384);*r1p=r1;*r0p=r0;}
+static void f890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,16383);*r1p=r1;*r0p=r0;}
+static void f891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-16385);*r1p=r1;*r0p=r0;}
+static void f892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,32768);*r1p=r1;*r0p=r0;}
+static void f893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-32768);*r1p=r1;*r0p=r0;}
+static void f894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,32767);*r1p=r1;*r0p=r0;}
+static void f895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8,0,-32769);*r1p=r1;*r0p=r0;}
+static void f896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,1);*r1p=r1;*r0p=r0;}
+static void f897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-1);*r1p=r1;*r0p=r0;}
+static void f898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,0);*r1p=r1;*r0p=r0;}
+static void f899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-2);*r1p=r1;*r0p=r0;}
+static void f900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,2);*r1p=r1;*r0p=r0;}
+static void f901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-2);*r1p=r1;*r0p=r0;}
+static void f902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,1);*r1p=r1;*r0p=r0;}
+static void f903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-3);*r1p=r1;*r0p=r0;}
+static void f904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,4);*r1p=r1;*r0p=r0;}
+static void f905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-4);*r1p=r1;*r0p=r0;}
+static void f906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,3);*r1p=r1;*r0p=r0;}
+static void f907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-5);*r1p=r1;*r0p=r0;}
+static void f908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,8);*r1p=r1;*r0p=r0;}
+static void f909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-8);*r1p=r1;*r0p=r0;}
+static void f910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,7);*r1p=r1;*r0p=r0;}
+static void f911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-9);*r1p=r1;*r0p=r0;}
+static void f912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,16);*r1p=r1;*r0p=r0;}
+static void f913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-16);*r1p=r1;*r0p=r0;}
+static void f914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,15);*r1p=r1;*r0p=r0;}
+static void f915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-17);*r1p=r1;*r0p=r0;}
+static void f916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,32);*r1p=r1;*r0p=r0;}
+static void f917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-32);*r1p=r1;*r0p=r0;}
+static void f918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,31);*r1p=r1;*r0p=r0;}
+static void f919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-33);*r1p=r1;*r0p=r0;}
+static void f920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,64);*r1p=r1;*r0p=r0;}
+static void f921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-64);*r1p=r1;*r0p=r0;}
+static void f922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,63);*r1p=r1;*r0p=r0;}
+static void f923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-65);*r1p=r1;*r0p=r0;}
+static void f924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,128);*r1p=r1;*r0p=r0;}
+static void f925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-128);*r1p=r1;*r0p=r0;}
+static void f926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,127);*r1p=r1;*r0p=r0;}
+static void f927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-129);*r1p=r1;*r0p=r0;}
+static void f928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,256);*r1p=r1;*r0p=r0;}
+static void f929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-256);*r1p=r1;*r0p=r0;}
+static void f930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,255);*r1p=r1;*r0p=r0;}
+static void f931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-257);*r1p=r1;*r0p=r0;}
+static void f932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,512);*r1p=r1;*r0p=r0;}
+static void f933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-512);*r1p=r1;*r0p=r0;}
+static void f934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,511);*r1p=r1;*r0p=r0;}
+static void f935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-513);*r1p=r1;*r0p=r0;}
+static void f936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,1024);*r1p=r1;*r0p=r0;}
+static void f937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-1024);*r1p=r1;*r0p=r0;}
+static void f938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,1023);*r1p=r1;*r0p=r0;}
+static void f939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-1025);*r1p=r1;*r0p=r0;}
+static void f940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,2048);*r1p=r1;*r0p=r0;}
+static void f941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-2048);*r1p=r1;*r0p=r0;}
+static void f942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,2047);*r1p=r1;*r0p=r0;}
+static void f943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-2049);*r1p=r1;*r0p=r0;}
+static void f944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,4096);*r1p=r1;*r0p=r0;}
+static void f945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-4096);*r1p=r1;*r0p=r0;}
+static void f946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,4095);*r1p=r1;*r0p=r0;}
+static void f947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-4097);*r1p=r1;*r0p=r0;}
+static void f948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,8192);*r1p=r1;*r0p=r0;}
+static void f949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-8192);*r1p=r1;*r0p=r0;}
+static void f950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,8191);*r1p=r1;*r0p=r0;}
+static void f951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-8193);*r1p=r1;*r0p=r0;}
+static void f952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,16384);*r1p=r1;*r0p=r0;}
+static void f953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-16384);*r1p=r1;*r0p=r0;}
+static void f954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,16383);*r1p=r1;*r0p=r0;}
+static void f955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-16385);*r1p=r1;*r0p=r0;}
+static void f956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,32768);*r1p=r1;*r0p=r0;}
+static void f957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-32768);*r1p=r1;*r0p=r0;}
+static void f958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,32767);*r1p=r1;*r0p=r0;}
+static void f959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,7,0,-32769);*r1p=r1;*r0p=r0;}
+static void f960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,1);*r1p=r1;*r0p=r0;}
+static void f961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-1);*r1p=r1;*r0p=r0;}
+static void f962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,0);*r1p=r1;*r0p=r0;}
+static void f963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-2);*r1p=r1;*r0p=r0;}
+static void f964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,2);*r1p=r1;*r0p=r0;}
+static void f965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-2);*r1p=r1;*r0p=r0;}
+static void f966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,1);*r1p=r1;*r0p=r0;}
+static void f967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-3);*r1p=r1;*r0p=r0;}
+static void f968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,4);*r1p=r1;*r0p=r0;}
+static void f969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-4);*r1p=r1;*r0p=r0;}
+static void f970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,3);*r1p=r1;*r0p=r0;}
+static void f971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-5);*r1p=r1;*r0p=r0;}
+static void f972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,8);*r1p=r1;*r0p=r0;}
+static void f973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-8);*r1p=r1;*r0p=r0;}
+static void f974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,7);*r1p=r1;*r0p=r0;}
+static void f975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-9);*r1p=r1;*r0p=r0;}
+static void f976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,16);*r1p=r1;*r0p=r0;}
+static void f977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-16);*r1p=r1;*r0p=r0;}
+static void f978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,15);*r1p=r1;*r0p=r0;}
+static void f979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-17);*r1p=r1;*r0p=r0;}
+static void f980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,32);*r1p=r1;*r0p=r0;}
+static void f981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-32);*r1p=r1;*r0p=r0;}
+static void f982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,31);*r1p=r1;*r0p=r0;}
+static void f983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-33);*r1p=r1;*r0p=r0;}
+static void f984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,64);*r1p=r1;*r0p=r0;}
+static void f985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-64);*r1p=r1;*r0p=r0;}
+static void f986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,63);*r1p=r1;*r0p=r0;}
+static void f987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-65);*r1p=r1;*r0p=r0;}
+static void f988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,128);*r1p=r1;*r0p=r0;}
+static void f989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-128);*r1p=r1;*r0p=r0;}
+static void f990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,127);*r1p=r1;*r0p=r0;}
+static void f991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-129);*r1p=r1;*r0p=r0;}
+static void f992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,256);*r1p=r1;*r0p=r0;}
+static void f993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-256);*r1p=r1;*r0p=r0;}
+static void f994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,255);*r1p=r1;*r0p=r0;}
+static void f995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-257);*r1p=r1;*r0p=r0;}
+static void f996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,512);*r1p=r1;*r0p=r0;}
+static void f997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-512);*r1p=r1;*r0p=r0;}
+static void f998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,511);*r1p=r1;*r0p=r0;}
+static void f999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-513);*r1p=r1;*r0p=r0;}
+static void f1000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,1024);*r1p=r1;*r0p=r0;}
+static void f1001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,1023);*r1p=r1;*r0p=r0;}
+static void f1003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,2048);*r1p=r1;*r0p=r0;}
+static void f1005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,2047);*r1p=r1;*r0p=r0;}
+static void f1007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,4096);*r1p=r1;*r0p=r0;}
+static void f1009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,4095);*r1p=r1;*r0p=r0;}
+static void f1011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,8192);*r1p=r1;*r0p=r0;}
+static void f1013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,8191);*r1p=r1;*r0p=r0;}
+static void f1015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,16384);*r1p=r1;*r0p=r0;}
+static void f1017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,16383);*r1p=r1;*r0p=r0;}
+static void f1019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,32768);*r1p=r1;*r0p=r0;}
+static void f1021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,32767);*r1p=r1;*r0p=r0;}
+static void f1023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-9,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,1);*r1p=r1;*r0p=r0;}
+static void f1025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-1);*r1p=r1;*r0p=r0;}
+static void f1026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,0);*r1p=r1;*r0p=r0;}
+static void f1027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,2);*r1p=r1;*r0p=r0;}
+static void f1029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,1);*r1p=r1;*r0p=r0;}
+static void f1031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-3);*r1p=r1;*r0p=r0;}
+static void f1032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,4);*r1p=r1;*r0p=r0;}
+static void f1033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-4);*r1p=r1;*r0p=r0;}
+static void f1034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,3);*r1p=r1;*r0p=r0;}
+static void f1035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-5);*r1p=r1;*r0p=r0;}
+static void f1036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,8);*r1p=r1;*r0p=r0;}
+static void f1037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-8);*r1p=r1;*r0p=r0;}
+static void f1038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,7);*r1p=r1;*r0p=r0;}
+static void f1039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-9);*r1p=r1;*r0p=r0;}
+static void f1040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,16);*r1p=r1;*r0p=r0;}
+static void f1041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-16);*r1p=r1;*r0p=r0;}
+static void f1042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,15);*r1p=r1;*r0p=r0;}
+static void f1043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-17);*r1p=r1;*r0p=r0;}
+static void f1044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,32);*r1p=r1;*r0p=r0;}
+static void f1045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-32);*r1p=r1;*r0p=r0;}
+static void f1046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,31);*r1p=r1;*r0p=r0;}
+static void f1047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-33);*r1p=r1;*r0p=r0;}
+static void f1048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,64);*r1p=r1;*r0p=r0;}
+static void f1049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-64);*r1p=r1;*r0p=r0;}
+static void f1050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,63);*r1p=r1;*r0p=r0;}
+static void f1051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-65);*r1p=r1;*r0p=r0;}
+static void f1052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,128);*r1p=r1;*r0p=r0;}
+static void f1053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-128);*r1p=r1;*r0p=r0;}
+static void f1054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,127);*r1p=r1;*r0p=r0;}
+static void f1055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-129);*r1p=r1;*r0p=r0;}
+static void f1056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,256);*r1p=r1;*r0p=r0;}
+static void f1057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-256);*r1p=r1;*r0p=r0;}
+static void f1058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,255);*r1p=r1;*r0p=r0;}
+static void f1059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-257);*r1p=r1;*r0p=r0;}
+static void f1060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,512);*r1p=r1;*r0p=r0;}
+static void f1061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-512);*r1p=r1;*r0p=r0;}
+static void f1062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,511);*r1p=r1;*r0p=r0;}
+static void f1063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-513);*r1p=r1;*r0p=r0;}
+static void f1064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,1024);*r1p=r1;*r0p=r0;}
+static void f1065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,1023);*r1p=r1;*r0p=r0;}
+static void f1067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,2048);*r1p=r1;*r0p=r0;}
+static void f1069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,2047);*r1p=r1;*r0p=r0;}
+static void f1071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,4096);*r1p=r1;*r0p=r0;}
+static void f1073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,4095);*r1p=r1;*r0p=r0;}
+static void f1075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,8192);*r1p=r1;*r0p=r0;}
+static void f1077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,8191);*r1p=r1;*r0p=r0;}
+static void f1079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,16384);*r1p=r1;*r0p=r0;}
+static void f1081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,16383);*r1p=r1;*r0p=r0;}
+static void f1083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,32768);*r1p=r1;*r0p=r0;}
+static void f1085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,32767);*r1p=r1;*r0p=r0;}
+static void f1087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,1);*r1p=r1;*r0p=r0;}
+static void f1089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-1);*r1p=r1;*r0p=r0;}
+static void f1090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,0);*r1p=r1;*r0p=r0;}
+static void f1091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,2);*r1p=r1;*r0p=r0;}
+static void f1093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,1);*r1p=r1;*r0p=r0;}
+static void f1095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-3);*r1p=r1;*r0p=r0;}
+static void f1096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,4);*r1p=r1;*r0p=r0;}
+static void f1097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-4);*r1p=r1;*r0p=r0;}
+static void f1098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,3);*r1p=r1;*r0p=r0;}
+static void f1099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-5);*r1p=r1;*r0p=r0;}
+static void f1100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,8);*r1p=r1;*r0p=r0;}
+static void f1101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-8);*r1p=r1;*r0p=r0;}
+static void f1102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,7);*r1p=r1;*r0p=r0;}
+static void f1103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-9);*r1p=r1;*r0p=r0;}
+static void f1104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,16);*r1p=r1;*r0p=r0;}
+static void f1105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-16);*r1p=r1;*r0p=r0;}
+static void f1106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,15);*r1p=r1;*r0p=r0;}
+static void f1107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-17);*r1p=r1;*r0p=r0;}
+static void f1108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,32);*r1p=r1;*r0p=r0;}
+static void f1109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-32);*r1p=r1;*r0p=r0;}
+static void f1110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,31);*r1p=r1;*r0p=r0;}
+static void f1111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-33);*r1p=r1;*r0p=r0;}
+static void f1112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,64);*r1p=r1;*r0p=r0;}
+static void f1113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-64);*r1p=r1;*r0p=r0;}
+static void f1114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,63);*r1p=r1;*r0p=r0;}
+static void f1115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-65);*r1p=r1;*r0p=r0;}
+static void f1116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,128);*r1p=r1;*r0p=r0;}
+static void f1117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-128);*r1p=r1;*r0p=r0;}
+static void f1118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,127);*r1p=r1;*r0p=r0;}
+static void f1119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-129);*r1p=r1;*r0p=r0;}
+static void f1120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,256);*r1p=r1;*r0p=r0;}
+static void f1121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-256);*r1p=r1;*r0p=r0;}
+static void f1122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,255);*r1p=r1;*r0p=r0;}
+static void f1123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-257);*r1p=r1;*r0p=r0;}
+static void f1124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,512);*r1p=r1;*r0p=r0;}
+static void f1125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-512);*r1p=r1;*r0p=r0;}
+static void f1126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,511);*r1p=r1;*r0p=r0;}
+static void f1127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-513);*r1p=r1;*r0p=r0;}
+static void f1128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,1024);*r1p=r1;*r0p=r0;}
+static void f1129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,1023);*r1p=r1;*r0p=r0;}
+static void f1131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,2048);*r1p=r1;*r0p=r0;}
+static void f1133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,2047);*r1p=r1;*r0p=r0;}
+static void f1135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,4096);*r1p=r1;*r0p=r0;}
+static void f1137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,4095);*r1p=r1;*r0p=r0;}
+static void f1139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,8192);*r1p=r1;*r0p=r0;}
+static void f1141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,8191);*r1p=r1;*r0p=r0;}
+static void f1143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,16384);*r1p=r1;*r0p=r0;}
+static void f1145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,16383);*r1p=r1;*r0p=r0;}
+static void f1147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,32768);*r1p=r1;*r0p=r0;}
+static void f1149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,32767);*r1p=r1;*r0p=r0;}
+static void f1151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,1);*r1p=r1;*r0p=r0;}
+static void f1153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-1);*r1p=r1;*r0p=r0;}
+static void f1154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,0);*r1p=r1;*r0p=r0;}
+static void f1155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-2);*r1p=r1;*r0p=r0;}
+static void f1156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,2);*r1p=r1;*r0p=r0;}
+static void f1157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-2);*r1p=r1;*r0p=r0;}
+static void f1158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,1);*r1p=r1;*r0p=r0;}
+static void f1159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-3);*r1p=r1;*r0p=r0;}
+static void f1160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,4);*r1p=r1;*r0p=r0;}
+static void f1161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-4);*r1p=r1;*r0p=r0;}
+static void f1162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,3);*r1p=r1;*r0p=r0;}
+static void f1163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-5);*r1p=r1;*r0p=r0;}
+static void f1164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,8);*r1p=r1;*r0p=r0;}
+static void f1165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-8);*r1p=r1;*r0p=r0;}
+static void f1166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,7);*r1p=r1;*r0p=r0;}
+static void f1167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-9);*r1p=r1;*r0p=r0;}
+static void f1168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,16);*r1p=r1;*r0p=r0;}
+static void f1169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-16);*r1p=r1;*r0p=r0;}
+static void f1170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,15);*r1p=r1;*r0p=r0;}
+static void f1171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-17);*r1p=r1;*r0p=r0;}
+static void f1172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,32);*r1p=r1;*r0p=r0;}
+static void f1173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-32);*r1p=r1;*r0p=r0;}
+static void f1174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,31);*r1p=r1;*r0p=r0;}
+static void f1175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-33);*r1p=r1;*r0p=r0;}
+static void f1176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,64);*r1p=r1;*r0p=r0;}
+static void f1177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-64);*r1p=r1;*r0p=r0;}
+static void f1178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,63);*r1p=r1;*r0p=r0;}
+static void f1179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-65);*r1p=r1;*r0p=r0;}
+static void f1180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,128);*r1p=r1;*r0p=r0;}
+static void f1181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-128);*r1p=r1;*r0p=r0;}
+static void f1182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,127);*r1p=r1;*r0p=r0;}
+static void f1183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-129);*r1p=r1;*r0p=r0;}
+static void f1184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,256);*r1p=r1;*r0p=r0;}
+static void f1185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-256);*r1p=r1;*r0p=r0;}
+static void f1186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,255);*r1p=r1;*r0p=r0;}
+static void f1187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-257);*r1p=r1;*r0p=r0;}
+static void f1188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,512);*r1p=r1;*r0p=r0;}
+static void f1189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-512);*r1p=r1;*r0p=r0;}
+static void f1190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,511);*r1p=r1;*r0p=r0;}
+static void f1191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-513);*r1p=r1;*r0p=r0;}
+static void f1192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,1024);*r1p=r1;*r0p=r0;}
+static void f1193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,1023);*r1p=r1;*r0p=r0;}
+static void f1195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,2048);*r1p=r1;*r0p=r0;}
+static void f1197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,2047);*r1p=r1;*r0p=r0;}
+static void f1199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,4096);*r1p=r1;*r0p=r0;}
+static void f1201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,4095);*r1p=r1;*r0p=r0;}
+static void f1203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,8192);*r1p=r1;*r0p=r0;}
+static void f1205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,8191);*r1p=r1;*r0p=r0;}
+static void f1207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,16384);*r1p=r1;*r0p=r0;}
+static void f1209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,16383);*r1p=r1;*r0p=r0;}
+static void f1211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,32768);*r1p=r1;*r0p=r0;}
+static void f1213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,32767);*r1p=r1;*r0p=r0;}
+static void f1215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,15,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,1);*r1p=r1;*r0p=r0;}
+static void f1217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-1);*r1p=r1;*r0p=r0;}
+static void f1218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,0);*r1p=r1;*r0p=r0;}
+static void f1219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-2);*r1p=r1;*r0p=r0;}
+static void f1220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,2);*r1p=r1;*r0p=r0;}
+static void f1221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-2);*r1p=r1;*r0p=r0;}
+static void f1222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,1);*r1p=r1;*r0p=r0;}
+static void f1223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-3);*r1p=r1;*r0p=r0;}
+static void f1224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,4);*r1p=r1;*r0p=r0;}
+static void f1225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-4);*r1p=r1;*r0p=r0;}
+static void f1226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,3);*r1p=r1;*r0p=r0;}
+static void f1227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-5);*r1p=r1;*r0p=r0;}
+static void f1228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,8);*r1p=r1;*r0p=r0;}
+static void f1229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-8);*r1p=r1;*r0p=r0;}
+static void f1230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,7);*r1p=r1;*r0p=r0;}
+static void f1231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-9);*r1p=r1;*r0p=r0;}
+static void f1232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,16);*r1p=r1;*r0p=r0;}
+static void f1233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-16);*r1p=r1;*r0p=r0;}
+static void f1234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,15);*r1p=r1;*r0p=r0;}
+static void f1235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-17);*r1p=r1;*r0p=r0;}
+static void f1236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,32);*r1p=r1;*r0p=r0;}
+static void f1237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-32);*r1p=r1;*r0p=r0;}
+static void f1238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,31);*r1p=r1;*r0p=r0;}
+static void f1239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-33);*r1p=r1;*r0p=r0;}
+static void f1240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,64);*r1p=r1;*r0p=r0;}
+static void f1241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-64);*r1p=r1;*r0p=r0;}
+static void f1242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,63);*r1p=r1;*r0p=r0;}
+static void f1243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-65);*r1p=r1;*r0p=r0;}
+static void f1244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,128);*r1p=r1;*r0p=r0;}
+static void f1245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-128);*r1p=r1;*r0p=r0;}
+static void f1246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,127);*r1p=r1;*r0p=r0;}
+static void f1247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-129);*r1p=r1;*r0p=r0;}
+static void f1248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,256);*r1p=r1;*r0p=r0;}
+static void f1249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-256);*r1p=r1;*r0p=r0;}
+static void f1250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,255);*r1p=r1;*r0p=r0;}
+static void f1251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-257);*r1p=r1;*r0p=r0;}
+static void f1252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,512);*r1p=r1;*r0p=r0;}
+static void f1253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-512);*r1p=r1;*r0p=r0;}
+static void f1254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,511);*r1p=r1;*r0p=r0;}
+static void f1255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-513);*r1p=r1;*r0p=r0;}
+static void f1256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,1024);*r1p=r1;*r0p=r0;}
+static void f1257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,1023);*r1p=r1;*r0p=r0;}
+static void f1259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,2048);*r1p=r1;*r0p=r0;}
+static void f1261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,2047);*r1p=r1;*r0p=r0;}
+static void f1263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,4096);*r1p=r1;*r0p=r0;}
+static void f1265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,4095);*r1p=r1;*r0p=r0;}
+static void f1267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,8192);*r1p=r1;*r0p=r0;}
+static void f1269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,8191);*r1p=r1;*r0p=r0;}
+static void f1271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,16384);*r1p=r1;*r0p=r0;}
+static void f1273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,16383);*r1p=r1;*r0p=r0;}
+static void f1275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,32768);*r1p=r1;*r0p=r0;}
+static void f1277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,32767);*r1p=r1;*r0p=r0;}
+static void f1279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-17,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,1);*r1p=r1;*r0p=r0;}
+static void f1281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-1);*r1p=r1;*r0p=r0;}
+static void f1282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,0);*r1p=r1;*r0p=r0;}
+static void f1283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,2);*r1p=r1;*r0p=r0;}
+static void f1285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,1);*r1p=r1;*r0p=r0;}
+static void f1287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-3);*r1p=r1;*r0p=r0;}
+static void f1288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,4);*r1p=r1;*r0p=r0;}
+static void f1289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-4);*r1p=r1;*r0p=r0;}
+static void f1290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,3);*r1p=r1;*r0p=r0;}
+static void f1291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-5);*r1p=r1;*r0p=r0;}
+static void f1292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,8);*r1p=r1;*r0p=r0;}
+static void f1293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-8);*r1p=r1;*r0p=r0;}
+static void f1294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,7);*r1p=r1;*r0p=r0;}
+static void f1295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-9);*r1p=r1;*r0p=r0;}
+static void f1296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,16);*r1p=r1;*r0p=r0;}
+static void f1297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-16);*r1p=r1;*r0p=r0;}
+static void f1298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,15);*r1p=r1;*r0p=r0;}
+static void f1299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-17);*r1p=r1;*r0p=r0;}
+static void f1300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,32);*r1p=r1;*r0p=r0;}
+static void f1301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-32);*r1p=r1;*r0p=r0;}
+static void f1302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,31);*r1p=r1;*r0p=r0;}
+static void f1303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-33);*r1p=r1;*r0p=r0;}
+static void f1304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,64);*r1p=r1;*r0p=r0;}
+static void f1305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-64);*r1p=r1;*r0p=r0;}
+static void f1306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,63);*r1p=r1;*r0p=r0;}
+static void f1307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-65);*r1p=r1;*r0p=r0;}
+static void f1308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,128);*r1p=r1;*r0p=r0;}
+static void f1309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-128);*r1p=r1;*r0p=r0;}
+static void f1310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,127);*r1p=r1;*r0p=r0;}
+static void f1311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-129);*r1p=r1;*r0p=r0;}
+static void f1312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,256);*r1p=r1;*r0p=r0;}
+static void f1313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-256);*r1p=r1;*r0p=r0;}
+static void f1314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,255);*r1p=r1;*r0p=r0;}
+static void f1315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-257);*r1p=r1;*r0p=r0;}
+static void f1316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,512);*r1p=r1;*r0p=r0;}
+static void f1317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-512);*r1p=r1;*r0p=r0;}
+static void f1318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,511);*r1p=r1;*r0p=r0;}
+static void f1319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-513);*r1p=r1;*r0p=r0;}
+static void f1320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,1024);*r1p=r1;*r0p=r0;}
+static void f1321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,1023);*r1p=r1;*r0p=r0;}
+static void f1323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,2048);*r1p=r1;*r0p=r0;}
+static void f1325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,2047);*r1p=r1;*r0p=r0;}
+static void f1327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,4096);*r1p=r1;*r0p=r0;}
+static void f1329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,4095);*r1p=r1;*r0p=r0;}
+static void f1331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,8192);*r1p=r1;*r0p=r0;}
+static void f1333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,8191);*r1p=r1;*r0p=r0;}
+static void f1335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,16384);*r1p=r1;*r0p=r0;}
+static void f1337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,16383);*r1p=r1;*r0p=r0;}
+static void f1339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,32768);*r1p=r1;*r0p=r0;}
+static void f1341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,32767);*r1p=r1;*r0p=r0;}
+static void f1343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,1);*r1p=r1;*r0p=r0;}
+static void f1345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-1);*r1p=r1;*r0p=r0;}
+static void f1346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,0);*r1p=r1;*r0p=r0;}
+static void f1347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,2);*r1p=r1;*r0p=r0;}
+static void f1349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,1);*r1p=r1;*r0p=r0;}
+static void f1351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-3);*r1p=r1;*r0p=r0;}
+static void f1352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,4);*r1p=r1;*r0p=r0;}
+static void f1353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-4);*r1p=r1;*r0p=r0;}
+static void f1354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,3);*r1p=r1;*r0p=r0;}
+static void f1355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-5);*r1p=r1;*r0p=r0;}
+static void f1356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,8);*r1p=r1;*r0p=r0;}
+static void f1357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-8);*r1p=r1;*r0p=r0;}
+static void f1358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,7);*r1p=r1;*r0p=r0;}
+static void f1359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-9);*r1p=r1;*r0p=r0;}
+static void f1360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,16);*r1p=r1;*r0p=r0;}
+static void f1361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-16);*r1p=r1;*r0p=r0;}
+static void f1362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,15);*r1p=r1;*r0p=r0;}
+static void f1363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-17);*r1p=r1;*r0p=r0;}
+static void f1364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,32);*r1p=r1;*r0p=r0;}
+static void f1365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-32);*r1p=r1;*r0p=r0;}
+static void f1366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,31);*r1p=r1;*r0p=r0;}
+static void f1367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-33);*r1p=r1;*r0p=r0;}
+static void f1368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,64);*r1p=r1;*r0p=r0;}
+static void f1369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-64);*r1p=r1;*r0p=r0;}
+static void f1370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,63);*r1p=r1;*r0p=r0;}
+static void f1371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-65);*r1p=r1;*r0p=r0;}
+static void f1372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,128);*r1p=r1;*r0p=r0;}
+static void f1373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-128);*r1p=r1;*r0p=r0;}
+static void f1374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,127);*r1p=r1;*r0p=r0;}
+static void f1375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-129);*r1p=r1;*r0p=r0;}
+static void f1376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,256);*r1p=r1;*r0p=r0;}
+static void f1377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-256);*r1p=r1;*r0p=r0;}
+static void f1378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,255);*r1p=r1;*r0p=r0;}
+static void f1379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-257);*r1p=r1;*r0p=r0;}
+static void f1380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,512);*r1p=r1;*r0p=r0;}
+static void f1381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-512);*r1p=r1;*r0p=r0;}
+static void f1382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,511);*r1p=r1;*r0p=r0;}
+static void f1383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-513);*r1p=r1;*r0p=r0;}
+static void f1384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,1024);*r1p=r1;*r0p=r0;}
+static void f1385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,1023);*r1p=r1;*r0p=r0;}
+static void f1387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,2048);*r1p=r1;*r0p=r0;}
+static void f1389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,2047);*r1p=r1;*r0p=r0;}
+static void f1391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,4096);*r1p=r1;*r0p=r0;}
+static void f1393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,4095);*r1p=r1;*r0p=r0;}
+static void f1395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,8192);*r1p=r1;*r0p=r0;}
+static void f1397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,8191);*r1p=r1;*r0p=r0;}
+static void f1399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,16384);*r1p=r1;*r0p=r0;}
+static void f1401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,16383);*r1p=r1;*r0p=r0;}
+static void f1403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,32768);*r1p=r1;*r0p=r0;}
+static void f1405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,32767);*r1p=r1;*r0p=r0;}
+static void f1407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,1);*r1p=r1;*r0p=r0;}
+static void f1409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-1);*r1p=r1;*r0p=r0;}
+static void f1410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,0);*r1p=r1;*r0p=r0;}
+static void f1411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-2);*r1p=r1;*r0p=r0;}
+static void f1412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,2);*r1p=r1;*r0p=r0;}
+static void f1413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-2);*r1p=r1;*r0p=r0;}
+static void f1414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,1);*r1p=r1;*r0p=r0;}
+static void f1415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-3);*r1p=r1;*r0p=r0;}
+static void f1416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,4);*r1p=r1;*r0p=r0;}
+static void f1417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-4);*r1p=r1;*r0p=r0;}
+static void f1418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,3);*r1p=r1;*r0p=r0;}
+static void f1419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-5);*r1p=r1;*r0p=r0;}
+static void f1420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,8);*r1p=r1;*r0p=r0;}
+static void f1421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-8);*r1p=r1;*r0p=r0;}
+static void f1422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,7);*r1p=r1;*r0p=r0;}
+static void f1423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-9);*r1p=r1;*r0p=r0;}
+static void f1424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,16);*r1p=r1;*r0p=r0;}
+static void f1425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-16);*r1p=r1;*r0p=r0;}
+static void f1426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,15);*r1p=r1;*r0p=r0;}
+static void f1427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-17);*r1p=r1;*r0p=r0;}
+static void f1428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,32);*r1p=r1;*r0p=r0;}
+static void f1429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-32);*r1p=r1;*r0p=r0;}
+static void f1430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,31);*r1p=r1;*r0p=r0;}
+static void f1431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-33);*r1p=r1;*r0p=r0;}
+static void f1432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,64);*r1p=r1;*r0p=r0;}
+static void f1433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-64);*r1p=r1;*r0p=r0;}
+static void f1434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,63);*r1p=r1;*r0p=r0;}
+static void f1435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-65);*r1p=r1;*r0p=r0;}
+static void f1436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,128);*r1p=r1;*r0p=r0;}
+static void f1437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-128);*r1p=r1;*r0p=r0;}
+static void f1438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,127);*r1p=r1;*r0p=r0;}
+static void f1439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-129);*r1p=r1;*r0p=r0;}
+static void f1440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,256);*r1p=r1;*r0p=r0;}
+static void f1441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-256);*r1p=r1;*r0p=r0;}
+static void f1442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,255);*r1p=r1;*r0p=r0;}
+static void f1443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-257);*r1p=r1;*r0p=r0;}
+static void f1444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,512);*r1p=r1;*r0p=r0;}
+static void f1445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-512);*r1p=r1;*r0p=r0;}
+static void f1446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,511);*r1p=r1;*r0p=r0;}
+static void f1447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-513);*r1p=r1;*r0p=r0;}
+static void f1448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,1024);*r1p=r1;*r0p=r0;}
+static void f1449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,1023);*r1p=r1;*r0p=r0;}
+static void f1451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,2048);*r1p=r1;*r0p=r0;}
+static void f1453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,2047);*r1p=r1;*r0p=r0;}
+static void f1455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,4096);*r1p=r1;*r0p=r0;}
+static void f1457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,4095);*r1p=r1;*r0p=r0;}
+static void f1459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,8192);*r1p=r1;*r0p=r0;}
+static void f1461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,8191);*r1p=r1;*r0p=r0;}
+static void f1463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,16384);*r1p=r1;*r0p=r0;}
+static void f1465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,16383);*r1p=r1;*r0p=r0;}
+static void f1467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,32768);*r1p=r1;*r0p=r0;}
+static void f1469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,32767);*r1p=r1;*r0p=r0;}
+static void f1471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,31,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,1);*r1p=r1;*r0p=r0;}
+static void f1473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-1);*r1p=r1;*r0p=r0;}
+static void f1474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,0);*r1p=r1;*r0p=r0;}
+static void f1475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-2);*r1p=r1;*r0p=r0;}
+static void f1476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,2);*r1p=r1;*r0p=r0;}
+static void f1477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-2);*r1p=r1;*r0p=r0;}
+static void f1478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,1);*r1p=r1;*r0p=r0;}
+static void f1479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-3);*r1p=r1;*r0p=r0;}
+static void f1480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,4);*r1p=r1;*r0p=r0;}
+static void f1481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-4);*r1p=r1;*r0p=r0;}
+static void f1482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,3);*r1p=r1;*r0p=r0;}
+static void f1483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-5);*r1p=r1;*r0p=r0;}
+static void f1484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,8);*r1p=r1;*r0p=r0;}
+static void f1485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-8);*r1p=r1;*r0p=r0;}
+static void f1486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,7);*r1p=r1;*r0p=r0;}
+static void f1487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-9);*r1p=r1;*r0p=r0;}
+static void f1488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,16);*r1p=r1;*r0p=r0;}
+static void f1489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-16);*r1p=r1;*r0p=r0;}
+static void f1490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,15);*r1p=r1;*r0p=r0;}
+static void f1491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-17);*r1p=r1;*r0p=r0;}
+static void f1492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,32);*r1p=r1;*r0p=r0;}
+static void f1493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-32);*r1p=r1;*r0p=r0;}
+static void f1494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,31);*r1p=r1;*r0p=r0;}
+static void f1495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-33);*r1p=r1;*r0p=r0;}
+static void f1496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,64);*r1p=r1;*r0p=r0;}
+static void f1497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-64);*r1p=r1;*r0p=r0;}
+static void f1498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,63);*r1p=r1;*r0p=r0;}
+static void f1499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-65);*r1p=r1;*r0p=r0;}
+static void f1500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,128);*r1p=r1;*r0p=r0;}
+static void f1501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-128);*r1p=r1;*r0p=r0;}
+static void f1502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,127);*r1p=r1;*r0p=r0;}
+static void f1503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-129);*r1p=r1;*r0p=r0;}
+static void f1504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,256);*r1p=r1;*r0p=r0;}
+static void f1505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-256);*r1p=r1;*r0p=r0;}
+static void f1506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,255);*r1p=r1;*r0p=r0;}
+static void f1507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-257);*r1p=r1;*r0p=r0;}
+static void f1508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,512);*r1p=r1;*r0p=r0;}
+static void f1509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-512);*r1p=r1;*r0p=r0;}
+static void f1510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,511);*r1p=r1;*r0p=r0;}
+static void f1511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-513);*r1p=r1;*r0p=r0;}
+static void f1512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,1024);*r1p=r1;*r0p=r0;}
+static void f1513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,1023);*r1p=r1;*r0p=r0;}
+static void f1515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,2048);*r1p=r1;*r0p=r0;}
+static void f1517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,2047);*r1p=r1;*r0p=r0;}
+static void f1519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,4096);*r1p=r1;*r0p=r0;}
+static void f1521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,4095);*r1p=r1;*r0p=r0;}
+static void f1523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,8192);*r1p=r1;*r0p=r0;}
+static void f1525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,8191);*r1p=r1;*r0p=r0;}
+static void f1527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,16384);*r1p=r1;*r0p=r0;}
+static void f1529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,16383);*r1p=r1;*r0p=r0;}
+static void f1531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,32768);*r1p=r1;*r0p=r0;}
+static void f1533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,32767);*r1p=r1;*r0p=r0;}
+static void f1535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-33,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,1);*r1p=r1;*r0p=r0;}
+static void f1537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-1);*r1p=r1;*r0p=r0;}
+static void f1538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,0);*r1p=r1;*r0p=r0;}
+static void f1539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,2);*r1p=r1;*r0p=r0;}
+static void f1541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,1);*r1p=r1;*r0p=r0;}
+static void f1543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-3);*r1p=r1;*r0p=r0;}
+static void f1544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,4);*r1p=r1;*r0p=r0;}
+static void f1545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-4);*r1p=r1;*r0p=r0;}
+static void f1546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,3);*r1p=r1;*r0p=r0;}
+static void f1547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-5);*r1p=r1;*r0p=r0;}
+static void f1548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,8);*r1p=r1;*r0p=r0;}
+static void f1549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-8);*r1p=r1;*r0p=r0;}
+static void f1550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,7);*r1p=r1;*r0p=r0;}
+static void f1551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-9);*r1p=r1;*r0p=r0;}
+static void f1552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,16);*r1p=r1;*r0p=r0;}
+static void f1553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-16);*r1p=r1;*r0p=r0;}
+static void f1554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,15);*r1p=r1;*r0p=r0;}
+static void f1555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-17);*r1p=r1;*r0p=r0;}
+static void f1556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,32);*r1p=r1;*r0p=r0;}
+static void f1557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-32);*r1p=r1;*r0p=r0;}
+static void f1558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,31);*r1p=r1;*r0p=r0;}
+static void f1559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-33);*r1p=r1;*r0p=r0;}
+static void f1560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,64);*r1p=r1;*r0p=r0;}
+static void f1561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-64);*r1p=r1;*r0p=r0;}
+static void f1562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,63);*r1p=r1;*r0p=r0;}
+static void f1563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-65);*r1p=r1;*r0p=r0;}
+static void f1564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,128);*r1p=r1;*r0p=r0;}
+static void f1565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-128);*r1p=r1;*r0p=r0;}
+static void f1566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,127);*r1p=r1;*r0p=r0;}
+static void f1567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-129);*r1p=r1;*r0p=r0;}
+static void f1568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,256);*r1p=r1;*r0p=r0;}
+static void f1569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-256);*r1p=r1;*r0p=r0;}
+static void f1570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,255);*r1p=r1;*r0p=r0;}
+static void f1571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-257);*r1p=r1;*r0p=r0;}
+static void f1572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,512);*r1p=r1;*r0p=r0;}
+static void f1573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-512);*r1p=r1;*r0p=r0;}
+static void f1574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,511);*r1p=r1;*r0p=r0;}
+static void f1575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-513);*r1p=r1;*r0p=r0;}
+static void f1576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,1024);*r1p=r1;*r0p=r0;}
+static void f1577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,1023);*r1p=r1;*r0p=r0;}
+static void f1579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,2048);*r1p=r1;*r0p=r0;}
+static void f1581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,2047);*r1p=r1;*r0p=r0;}
+static void f1583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,4096);*r1p=r1;*r0p=r0;}
+static void f1585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,4095);*r1p=r1;*r0p=r0;}
+static void f1587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,8192);*r1p=r1;*r0p=r0;}
+static void f1589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,8191);*r1p=r1;*r0p=r0;}
+static void f1591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,16384);*r1p=r1;*r0p=r0;}
+static void f1593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,16383);*r1p=r1;*r0p=r0;}
+static void f1595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,32768);*r1p=r1;*r0p=r0;}
+static void f1597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,32767);*r1p=r1;*r0p=r0;}
+static void f1599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,64,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,1);*r1p=r1;*r0p=r0;}
+static void f1601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-1);*r1p=r1;*r0p=r0;}
+static void f1602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,0);*r1p=r1;*r0p=r0;}
+static void f1603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,2);*r1p=r1;*r0p=r0;}
+static void f1605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,1);*r1p=r1;*r0p=r0;}
+static void f1607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-3);*r1p=r1;*r0p=r0;}
+static void f1608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,4);*r1p=r1;*r0p=r0;}
+static void f1609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-4);*r1p=r1;*r0p=r0;}
+static void f1610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,3);*r1p=r1;*r0p=r0;}
+static void f1611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-5);*r1p=r1;*r0p=r0;}
+static void f1612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,8);*r1p=r1;*r0p=r0;}
+static void f1613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-8);*r1p=r1;*r0p=r0;}
+static void f1614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,7);*r1p=r1;*r0p=r0;}
+static void f1615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-9);*r1p=r1;*r0p=r0;}
+static void f1616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,16);*r1p=r1;*r0p=r0;}
+static void f1617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-16);*r1p=r1;*r0p=r0;}
+static void f1618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,15);*r1p=r1;*r0p=r0;}
+static void f1619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-17);*r1p=r1;*r0p=r0;}
+static void f1620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,32);*r1p=r1;*r0p=r0;}
+static void f1621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-32);*r1p=r1;*r0p=r0;}
+static void f1622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,31);*r1p=r1;*r0p=r0;}
+static void f1623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-33);*r1p=r1;*r0p=r0;}
+static void f1624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,64);*r1p=r1;*r0p=r0;}
+static void f1625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-64);*r1p=r1;*r0p=r0;}
+static void f1626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,63);*r1p=r1;*r0p=r0;}
+static void f1627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-65);*r1p=r1;*r0p=r0;}
+static void f1628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,128);*r1p=r1;*r0p=r0;}
+static void f1629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-128);*r1p=r1;*r0p=r0;}
+static void f1630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,127);*r1p=r1;*r0p=r0;}
+static void f1631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-129);*r1p=r1;*r0p=r0;}
+static void f1632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,256);*r1p=r1;*r0p=r0;}
+static void f1633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-256);*r1p=r1;*r0p=r0;}
+static void f1634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,255);*r1p=r1;*r0p=r0;}
+static void f1635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-257);*r1p=r1;*r0p=r0;}
+static void f1636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,512);*r1p=r1;*r0p=r0;}
+static void f1637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-512);*r1p=r1;*r0p=r0;}
+static void f1638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,511);*r1p=r1;*r0p=r0;}
+static void f1639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-513);*r1p=r1;*r0p=r0;}
+static void f1640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,1024);*r1p=r1;*r0p=r0;}
+static void f1641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,1023);*r1p=r1;*r0p=r0;}
+static void f1643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,2048);*r1p=r1;*r0p=r0;}
+static void f1645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,2047);*r1p=r1;*r0p=r0;}
+static void f1647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,4096);*r1p=r1;*r0p=r0;}
+static void f1649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,4095);*r1p=r1;*r0p=r0;}
+static void f1651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,8192);*r1p=r1;*r0p=r0;}
+static void f1653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,8191);*r1p=r1;*r0p=r0;}
+static void f1655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,16384);*r1p=r1;*r0p=r0;}
+static void f1657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,16383);*r1p=r1;*r0p=r0;}
+static void f1659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,32768);*r1p=r1;*r0p=r0;}
+static void f1661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,32767);*r1p=r1;*r0p=r0;}
+static void f1663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-64,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,1);*r1p=r1;*r0p=r0;}
+static void f1665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-1);*r1p=r1;*r0p=r0;}
+static void f1666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,0);*r1p=r1;*r0p=r0;}
+static void f1667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-2);*r1p=r1;*r0p=r0;}
+static void f1668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,2);*r1p=r1;*r0p=r0;}
+static void f1669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-2);*r1p=r1;*r0p=r0;}
+static void f1670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,1);*r1p=r1;*r0p=r0;}
+static void f1671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-3);*r1p=r1;*r0p=r0;}
+static void f1672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,4);*r1p=r1;*r0p=r0;}
+static void f1673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-4);*r1p=r1;*r0p=r0;}
+static void f1674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,3);*r1p=r1;*r0p=r0;}
+static void f1675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-5);*r1p=r1;*r0p=r0;}
+static void f1676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,8);*r1p=r1;*r0p=r0;}
+static void f1677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-8);*r1p=r1;*r0p=r0;}
+static void f1678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,7);*r1p=r1;*r0p=r0;}
+static void f1679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-9);*r1p=r1;*r0p=r0;}
+static void f1680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,16);*r1p=r1;*r0p=r0;}
+static void f1681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-16);*r1p=r1;*r0p=r0;}
+static void f1682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,15);*r1p=r1;*r0p=r0;}
+static void f1683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-17);*r1p=r1;*r0p=r0;}
+static void f1684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,32);*r1p=r1;*r0p=r0;}
+static void f1685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-32);*r1p=r1;*r0p=r0;}
+static void f1686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,31);*r1p=r1;*r0p=r0;}
+static void f1687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-33);*r1p=r1;*r0p=r0;}
+static void f1688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,64);*r1p=r1;*r0p=r0;}
+static void f1689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-64);*r1p=r1;*r0p=r0;}
+static void f1690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,63);*r1p=r1;*r0p=r0;}
+static void f1691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-65);*r1p=r1;*r0p=r0;}
+static void f1692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,128);*r1p=r1;*r0p=r0;}
+static void f1693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-128);*r1p=r1;*r0p=r0;}
+static void f1694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,127);*r1p=r1;*r0p=r0;}
+static void f1695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-129);*r1p=r1;*r0p=r0;}
+static void f1696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,256);*r1p=r1;*r0p=r0;}
+static void f1697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-256);*r1p=r1;*r0p=r0;}
+static void f1698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,255);*r1p=r1;*r0p=r0;}
+static void f1699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-257);*r1p=r1;*r0p=r0;}
+static void f1700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,512);*r1p=r1;*r0p=r0;}
+static void f1701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-512);*r1p=r1;*r0p=r0;}
+static void f1702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,511);*r1p=r1;*r0p=r0;}
+static void f1703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-513);*r1p=r1;*r0p=r0;}
+static void f1704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,1024);*r1p=r1;*r0p=r0;}
+static void f1705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,1023);*r1p=r1;*r0p=r0;}
+static void f1707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,2048);*r1p=r1;*r0p=r0;}
+static void f1709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,2047);*r1p=r1;*r0p=r0;}
+static void f1711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,4096);*r1p=r1;*r0p=r0;}
+static void f1713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,4095);*r1p=r1;*r0p=r0;}
+static void f1715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,8192);*r1p=r1;*r0p=r0;}
+static void f1717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,8191);*r1p=r1;*r0p=r0;}
+static void f1719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,16384);*r1p=r1;*r0p=r0;}
+static void f1721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,16383);*r1p=r1;*r0p=r0;}
+static void f1723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,32768);*r1p=r1;*r0p=r0;}
+static void f1725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,32767);*r1p=r1;*r0p=r0;}
+static void f1727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,63,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,1);*r1p=r1;*r0p=r0;}
+static void f1729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-1);*r1p=r1;*r0p=r0;}
+static void f1730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,0);*r1p=r1;*r0p=r0;}
+static void f1731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-2);*r1p=r1;*r0p=r0;}
+static void f1732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,2);*r1p=r1;*r0p=r0;}
+static void f1733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-2);*r1p=r1;*r0p=r0;}
+static void f1734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,1);*r1p=r1;*r0p=r0;}
+static void f1735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-3);*r1p=r1;*r0p=r0;}
+static void f1736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,4);*r1p=r1;*r0p=r0;}
+static void f1737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-4);*r1p=r1;*r0p=r0;}
+static void f1738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,3);*r1p=r1;*r0p=r0;}
+static void f1739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-5);*r1p=r1;*r0p=r0;}
+static void f1740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,8);*r1p=r1;*r0p=r0;}
+static void f1741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-8);*r1p=r1;*r0p=r0;}
+static void f1742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,7);*r1p=r1;*r0p=r0;}
+static void f1743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-9);*r1p=r1;*r0p=r0;}
+static void f1744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,16);*r1p=r1;*r0p=r0;}
+static void f1745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-16);*r1p=r1;*r0p=r0;}
+static void f1746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,15);*r1p=r1;*r0p=r0;}
+static void f1747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-17);*r1p=r1;*r0p=r0;}
+static void f1748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,32);*r1p=r1;*r0p=r0;}
+static void f1749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-32);*r1p=r1;*r0p=r0;}
+static void f1750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,31);*r1p=r1;*r0p=r0;}
+static void f1751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-33);*r1p=r1;*r0p=r0;}
+static void f1752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,64);*r1p=r1;*r0p=r0;}
+static void f1753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-64);*r1p=r1;*r0p=r0;}
+static void f1754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,63);*r1p=r1;*r0p=r0;}
+static void f1755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-65);*r1p=r1;*r0p=r0;}
+static void f1756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,128);*r1p=r1;*r0p=r0;}
+static void f1757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-128);*r1p=r1;*r0p=r0;}
+static void f1758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,127);*r1p=r1;*r0p=r0;}
+static void f1759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-129);*r1p=r1;*r0p=r0;}
+static void f1760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,256);*r1p=r1;*r0p=r0;}
+static void f1761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-256);*r1p=r1;*r0p=r0;}
+static void f1762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,255);*r1p=r1;*r0p=r0;}
+static void f1763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-257);*r1p=r1;*r0p=r0;}
+static void f1764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,512);*r1p=r1;*r0p=r0;}
+static void f1765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-512);*r1p=r1;*r0p=r0;}
+static void f1766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,511);*r1p=r1;*r0p=r0;}
+static void f1767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-513);*r1p=r1;*r0p=r0;}
+static void f1768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,1024);*r1p=r1;*r0p=r0;}
+static void f1769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,1023);*r1p=r1;*r0p=r0;}
+static void f1771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,2048);*r1p=r1;*r0p=r0;}
+static void f1773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,2047);*r1p=r1;*r0p=r0;}
+static void f1775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,4096);*r1p=r1;*r0p=r0;}
+static void f1777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,4095);*r1p=r1;*r0p=r0;}
+static void f1779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,8192);*r1p=r1;*r0p=r0;}
+static void f1781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,8191);*r1p=r1;*r0p=r0;}
+static void f1783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,16384);*r1p=r1;*r0p=r0;}
+static void f1785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,16383);*r1p=r1;*r0p=r0;}
+static void f1787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,32768);*r1p=r1;*r0p=r0;}
+static void f1789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,32767);*r1p=r1;*r0p=r0;}
+static void f1791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-65,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,1);*r1p=r1;*r0p=r0;}
+static void f1793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-1);*r1p=r1;*r0p=r0;}
+static void f1794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,0);*r1p=r1;*r0p=r0;}
+static void f1795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,2);*r1p=r1;*r0p=r0;}
+static void f1797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,1);*r1p=r1;*r0p=r0;}
+static void f1799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-3);*r1p=r1;*r0p=r0;}
+static void f1800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,4);*r1p=r1;*r0p=r0;}
+static void f1801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-4);*r1p=r1;*r0p=r0;}
+static void f1802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,3);*r1p=r1;*r0p=r0;}
+static void f1803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-5);*r1p=r1;*r0p=r0;}
+static void f1804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,8);*r1p=r1;*r0p=r0;}
+static void f1805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-8);*r1p=r1;*r0p=r0;}
+static void f1806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,7);*r1p=r1;*r0p=r0;}
+static void f1807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-9);*r1p=r1;*r0p=r0;}
+static void f1808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,16);*r1p=r1;*r0p=r0;}
+static void f1809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-16);*r1p=r1;*r0p=r0;}
+static void f1810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,15);*r1p=r1;*r0p=r0;}
+static void f1811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-17);*r1p=r1;*r0p=r0;}
+static void f1812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,32);*r1p=r1;*r0p=r0;}
+static void f1813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-32);*r1p=r1;*r0p=r0;}
+static void f1814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,31);*r1p=r1;*r0p=r0;}
+static void f1815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-33);*r1p=r1;*r0p=r0;}
+static void f1816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,64);*r1p=r1;*r0p=r0;}
+static void f1817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-64);*r1p=r1;*r0p=r0;}
+static void f1818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,63);*r1p=r1;*r0p=r0;}
+static void f1819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-65);*r1p=r1;*r0p=r0;}
+static void f1820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,128);*r1p=r1;*r0p=r0;}
+static void f1821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-128);*r1p=r1;*r0p=r0;}
+static void f1822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,127);*r1p=r1;*r0p=r0;}
+static void f1823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-129);*r1p=r1;*r0p=r0;}
+static void f1824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,256);*r1p=r1;*r0p=r0;}
+static void f1825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-256);*r1p=r1;*r0p=r0;}
+static void f1826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,255);*r1p=r1;*r0p=r0;}
+static void f1827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-257);*r1p=r1;*r0p=r0;}
+static void f1828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,512);*r1p=r1;*r0p=r0;}
+static void f1829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-512);*r1p=r1;*r0p=r0;}
+static void f1830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,511);*r1p=r1;*r0p=r0;}
+static void f1831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-513);*r1p=r1;*r0p=r0;}
+static void f1832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,1024);*r1p=r1;*r0p=r0;}
+static void f1833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,1023);*r1p=r1;*r0p=r0;}
+static void f1835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,2048);*r1p=r1;*r0p=r0;}
+static void f1837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,2047);*r1p=r1;*r0p=r0;}
+static void f1839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,4096);*r1p=r1;*r0p=r0;}
+static void f1841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,4095);*r1p=r1;*r0p=r0;}
+static void f1843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,8192);*r1p=r1;*r0p=r0;}
+static void f1845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,8191);*r1p=r1;*r0p=r0;}
+static void f1847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,16384);*r1p=r1;*r0p=r0;}
+static void f1849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,16383);*r1p=r1;*r0p=r0;}
+static void f1851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,32768);*r1p=r1;*r0p=r0;}
+static void f1853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,32767);*r1p=r1;*r0p=r0;}
+static void f1855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,128,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,1);*r1p=r1;*r0p=r0;}
+static void f1857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-1);*r1p=r1;*r0p=r0;}
+static void f1858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,0);*r1p=r1;*r0p=r0;}
+static void f1859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,2);*r1p=r1;*r0p=r0;}
+static void f1861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,1);*r1p=r1;*r0p=r0;}
+static void f1863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-3);*r1p=r1;*r0p=r0;}
+static void f1864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,4);*r1p=r1;*r0p=r0;}
+static void f1865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-4);*r1p=r1;*r0p=r0;}
+static void f1866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,3);*r1p=r1;*r0p=r0;}
+static void f1867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-5);*r1p=r1;*r0p=r0;}
+static void f1868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,8);*r1p=r1;*r0p=r0;}
+static void f1869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-8);*r1p=r1;*r0p=r0;}
+static void f1870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,7);*r1p=r1;*r0p=r0;}
+static void f1871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-9);*r1p=r1;*r0p=r0;}
+static void f1872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,16);*r1p=r1;*r0p=r0;}
+static void f1873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-16);*r1p=r1;*r0p=r0;}
+static void f1874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,15);*r1p=r1;*r0p=r0;}
+static void f1875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-17);*r1p=r1;*r0p=r0;}
+static void f1876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,32);*r1p=r1;*r0p=r0;}
+static void f1877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-32);*r1p=r1;*r0p=r0;}
+static void f1878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,31);*r1p=r1;*r0p=r0;}
+static void f1879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-33);*r1p=r1;*r0p=r0;}
+static void f1880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,64);*r1p=r1;*r0p=r0;}
+static void f1881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-64);*r1p=r1;*r0p=r0;}
+static void f1882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,63);*r1p=r1;*r0p=r0;}
+static void f1883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-65);*r1p=r1;*r0p=r0;}
+static void f1884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,128);*r1p=r1;*r0p=r0;}
+static void f1885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-128);*r1p=r1;*r0p=r0;}
+static void f1886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,127);*r1p=r1;*r0p=r0;}
+static void f1887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-129);*r1p=r1;*r0p=r0;}
+static void f1888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,256);*r1p=r1;*r0p=r0;}
+static void f1889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-256);*r1p=r1;*r0p=r0;}
+static void f1890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,255);*r1p=r1;*r0p=r0;}
+static void f1891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-257);*r1p=r1;*r0p=r0;}
+static void f1892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,512);*r1p=r1;*r0p=r0;}
+static void f1893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-512);*r1p=r1;*r0p=r0;}
+static void f1894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,511);*r1p=r1;*r0p=r0;}
+static void f1895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-513);*r1p=r1;*r0p=r0;}
+static void f1896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,1024);*r1p=r1;*r0p=r0;}
+static void f1897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,1023);*r1p=r1;*r0p=r0;}
+static void f1899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,2048);*r1p=r1;*r0p=r0;}
+static void f1901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,2047);*r1p=r1;*r0p=r0;}
+static void f1903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,4096);*r1p=r1;*r0p=r0;}
+static void f1905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,4095);*r1p=r1;*r0p=r0;}
+static void f1907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,8192);*r1p=r1;*r0p=r0;}
+static void f1909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,8191);*r1p=r1;*r0p=r0;}
+static void f1911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,16384);*r1p=r1;*r0p=r0;}
+static void f1913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,16383);*r1p=r1;*r0p=r0;}
+static void f1915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,32768);*r1p=r1;*r0p=r0;}
+static void f1917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,32767);*r1p=r1;*r0p=r0;}
+static void f1919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-128,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,1);*r1p=r1;*r0p=r0;}
+static void f1921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-1);*r1p=r1;*r0p=r0;}
+static void f1922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,0);*r1p=r1;*r0p=r0;}
+static void f1923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-2);*r1p=r1;*r0p=r0;}
+static void f1924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,2);*r1p=r1;*r0p=r0;}
+static void f1925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-2);*r1p=r1;*r0p=r0;}
+static void f1926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,1);*r1p=r1;*r0p=r0;}
+static void f1927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-3);*r1p=r1;*r0p=r0;}
+static void f1928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,4);*r1p=r1;*r0p=r0;}
+static void f1929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-4);*r1p=r1;*r0p=r0;}
+static void f1930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,3);*r1p=r1;*r0p=r0;}
+static void f1931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-5);*r1p=r1;*r0p=r0;}
+static void f1932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,8);*r1p=r1;*r0p=r0;}
+static void f1933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-8);*r1p=r1;*r0p=r0;}
+static void f1934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,7);*r1p=r1;*r0p=r0;}
+static void f1935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-9);*r1p=r1;*r0p=r0;}
+static void f1936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,16);*r1p=r1;*r0p=r0;}
+static void f1937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-16);*r1p=r1;*r0p=r0;}
+static void f1938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,15);*r1p=r1;*r0p=r0;}
+static void f1939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-17);*r1p=r1;*r0p=r0;}
+static void f1940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,32);*r1p=r1;*r0p=r0;}
+static void f1941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-32);*r1p=r1;*r0p=r0;}
+static void f1942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,31);*r1p=r1;*r0p=r0;}
+static void f1943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-33);*r1p=r1;*r0p=r0;}
+static void f1944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,64);*r1p=r1;*r0p=r0;}
+static void f1945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-64);*r1p=r1;*r0p=r0;}
+static void f1946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,63);*r1p=r1;*r0p=r0;}
+static void f1947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-65);*r1p=r1;*r0p=r0;}
+static void f1948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,128);*r1p=r1;*r0p=r0;}
+static void f1949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-128);*r1p=r1;*r0p=r0;}
+static void f1950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,127);*r1p=r1;*r0p=r0;}
+static void f1951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-129);*r1p=r1;*r0p=r0;}
+static void f1952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,256);*r1p=r1;*r0p=r0;}
+static void f1953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-256);*r1p=r1;*r0p=r0;}
+static void f1954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,255);*r1p=r1;*r0p=r0;}
+static void f1955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-257);*r1p=r1;*r0p=r0;}
+static void f1956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,512);*r1p=r1;*r0p=r0;}
+static void f1957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-512);*r1p=r1;*r0p=r0;}
+static void f1958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,511);*r1p=r1;*r0p=r0;}
+static void f1959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-513);*r1p=r1;*r0p=r0;}
+static void f1960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,1024);*r1p=r1;*r0p=r0;}
+static void f1961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,1023);*r1p=r1;*r0p=r0;}
+static void f1963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,2048);*r1p=r1;*r0p=r0;}
+static void f1965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,2047);*r1p=r1;*r0p=r0;}
+static void f1967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,4096);*r1p=r1;*r0p=r0;}
+static void f1969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,4095);*r1p=r1;*r0p=r0;}
+static void f1971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,8192);*r1p=r1;*r0p=r0;}
+static void f1973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,8191);*r1p=r1;*r0p=r0;}
+static void f1975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,16384);*r1p=r1;*r0p=r0;}
+static void f1977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,16383);*r1p=r1;*r0p=r0;}
+static void f1979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,32768);*r1p=r1;*r0p=r0;}
+static void f1981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,32767);*r1p=r1;*r0p=r0;}
+static void f1983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,127,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,1);*r1p=r1;*r0p=r0;}
+static void f1985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-1);*r1p=r1;*r0p=r0;}
+static void f1986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,0);*r1p=r1;*r0p=r0;}
+static void f1987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-2);*r1p=r1;*r0p=r0;}
+static void f1988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,2);*r1p=r1;*r0p=r0;}
+static void f1989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-2);*r1p=r1;*r0p=r0;}
+static void f1990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,1);*r1p=r1;*r0p=r0;}
+static void f1991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-3);*r1p=r1;*r0p=r0;}
+static void f1992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,4);*r1p=r1;*r0p=r0;}
+static void f1993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-4);*r1p=r1;*r0p=r0;}
+static void f1994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,3);*r1p=r1;*r0p=r0;}
+static void f1995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-5);*r1p=r1;*r0p=r0;}
+static void f1996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,8);*r1p=r1;*r0p=r0;}
+static void f1997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-8);*r1p=r1;*r0p=r0;}
+static void f1998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,7);*r1p=r1;*r0p=r0;}
+static void f1999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-9);*r1p=r1;*r0p=r0;}
+static void f2000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,16);*r1p=r1;*r0p=r0;}
+static void f2001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-16);*r1p=r1;*r0p=r0;}
+static void f2002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,15);*r1p=r1;*r0p=r0;}
+static void f2003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-17);*r1p=r1;*r0p=r0;}
+static void f2004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,32);*r1p=r1;*r0p=r0;}
+static void f2005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-32);*r1p=r1;*r0p=r0;}
+static void f2006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,31);*r1p=r1;*r0p=r0;}
+static void f2007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-33);*r1p=r1;*r0p=r0;}
+static void f2008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,64);*r1p=r1;*r0p=r0;}
+static void f2009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-64);*r1p=r1;*r0p=r0;}
+static void f2010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,63);*r1p=r1;*r0p=r0;}
+static void f2011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-65);*r1p=r1;*r0p=r0;}
+static void f2012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,128);*r1p=r1;*r0p=r0;}
+static void f2013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-128);*r1p=r1;*r0p=r0;}
+static void f2014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,127);*r1p=r1;*r0p=r0;}
+static void f2015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-129);*r1p=r1;*r0p=r0;}
+static void f2016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,256);*r1p=r1;*r0p=r0;}
+static void f2017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-256);*r1p=r1;*r0p=r0;}
+static void f2018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,255);*r1p=r1;*r0p=r0;}
+static void f2019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-257);*r1p=r1;*r0p=r0;}
+static void f2020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,512);*r1p=r1;*r0p=r0;}
+static void f2021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-512);*r1p=r1;*r0p=r0;}
+static void f2022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,511);*r1p=r1;*r0p=r0;}
+static void f2023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-513);*r1p=r1;*r0p=r0;}
+static void f2024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,1024);*r1p=r1;*r0p=r0;}
+static void f2025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,1023);*r1p=r1;*r0p=r0;}
+static void f2027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,2048);*r1p=r1;*r0p=r0;}
+static void f2029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,2047);*r1p=r1;*r0p=r0;}
+static void f2031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,4096);*r1p=r1;*r0p=r0;}
+static void f2033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,4095);*r1p=r1;*r0p=r0;}
+static void f2035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,8192);*r1p=r1;*r0p=r0;}
+static void f2037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,8191);*r1p=r1;*r0p=r0;}
+static void f2039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,16384);*r1p=r1;*r0p=r0;}
+static void f2041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,16383);*r1p=r1;*r0p=r0;}
+static void f2043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,32768);*r1p=r1;*r0p=r0;}
+static void f2045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,32767);*r1p=r1;*r0p=r0;}
+static void f2047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-129,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,1);*r1p=r1;*r0p=r0;}
+static void f2049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-1);*r1p=r1;*r0p=r0;}
+static void f2050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,0);*r1p=r1;*r0p=r0;}
+static void f2051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,2);*r1p=r1;*r0p=r0;}
+static void f2053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,1);*r1p=r1;*r0p=r0;}
+static void f2055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-3);*r1p=r1;*r0p=r0;}
+static void f2056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,4);*r1p=r1;*r0p=r0;}
+static void f2057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-4);*r1p=r1;*r0p=r0;}
+static void f2058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,3);*r1p=r1;*r0p=r0;}
+static void f2059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-5);*r1p=r1;*r0p=r0;}
+static void f2060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,8);*r1p=r1;*r0p=r0;}
+static void f2061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-8);*r1p=r1;*r0p=r0;}
+static void f2062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,7);*r1p=r1;*r0p=r0;}
+static void f2063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-9);*r1p=r1;*r0p=r0;}
+static void f2064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,16);*r1p=r1;*r0p=r0;}
+static void f2065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-16);*r1p=r1;*r0p=r0;}
+static void f2066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,15);*r1p=r1;*r0p=r0;}
+static void f2067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-17);*r1p=r1;*r0p=r0;}
+static void f2068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,32);*r1p=r1;*r0p=r0;}
+static void f2069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-32);*r1p=r1;*r0p=r0;}
+static void f2070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,31);*r1p=r1;*r0p=r0;}
+static void f2071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-33);*r1p=r1;*r0p=r0;}
+static void f2072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,64);*r1p=r1;*r0p=r0;}
+static void f2073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-64);*r1p=r1;*r0p=r0;}
+static void f2074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,63);*r1p=r1;*r0p=r0;}
+static void f2075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-65);*r1p=r1;*r0p=r0;}
+static void f2076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,128);*r1p=r1;*r0p=r0;}
+static void f2077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-128);*r1p=r1;*r0p=r0;}
+static void f2078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,127);*r1p=r1;*r0p=r0;}
+static void f2079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-129);*r1p=r1;*r0p=r0;}
+static void f2080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,256);*r1p=r1;*r0p=r0;}
+static void f2081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-256);*r1p=r1;*r0p=r0;}
+static void f2082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,255);*r1p=r1;*r0p=r0;}
+static void f2083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-257);*r1p=r1;*r0p=r0;}
+static void f2084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,512);*r1p=r1;*r0p=r0;}
+static void f2085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-512);*r1p=r1;*r0p=r0;}
+static void f2086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,511);*r1p=r1;*r0p=r0;}
+static void f2087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-513);*r1p=r1;*r0p=r0;}
+static void f2088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,1024);*r1p=r1;*r0p=r0;}
+static void f2089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,1023);*r1p=r1;*r0p=r0;}
+static void f2091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,2048);*r1p=r1;*r0p=r0;}
+static void f2093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,2047);*r1p=r1;*r0p=r0;}
+static void f2095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,4096);*r1p=r1;*r0p=r0;}
+static void f2097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,4095);*r1p=r1;*r0p=r0;}
+static void f2099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,8192);*r1p=r1;*r0p=r0;}
+static void f2101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,8191);*r1p=r1;*r0p=r0;}
+static void f2103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,16384);*r1p=r1;*r0p=r0;}
+static void f2105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,16383);*r1p=r1;*r0p=r0;}
+static void f2107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,32768);*r1p=r1;*r0p=r0;}
+static void f2109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,32767);*r1p=r1;*r0p=r0;}
+static void f2111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,256,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,1);*r1p=r1;*r0p=r0;}
+static void f2113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-1);*r1p=r1;*r0p=r0;}
+static void f2114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,0);*r1p=r1;*r0p=r0;}
+static void f2115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,2);*r1p=r1;*r0p=r0;}
+static void f2117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,1);*r1p=r1;*r0p=r0;}
+static void f2119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-3);*r1p=r1;*r0p=r0;}
+static void f2120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,4);*r1p=r1;*r0p=r0;}
+static void f2121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-4);*r1p=r1;*r0p=r0;}
+static void f2122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,3);*r1p=r1;*r0p=r0;}
+static void f2123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-5);*r1p=r1;*r0p=r0;}
+static void f2124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,8);*r1p=r1;*r0p=r0;}
+static void f2125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-8);*r1p=r1;*r0p=r0;}
+static void f2126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,7);*r1p=r1;*r0p=r0;}
+static void f2127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-9);*r1p=r1;*r0p=r0;}
+static void f2128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,16);*r1p=r1;*r0p=r0;}
+static void f2129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-16);*r1p=r1;*r0p=r0;}
+static void f2130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,15);*r1p=r1;*r0p=r0;}
+static void f2131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-17);*r1p=r1;*r0p=r0;}
+static void f2132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,32);*r1p=r1;*r0p=r0;}
+static void f2133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-32);*r1p=r1;*r0p=r0;}
+static void f2134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,31);*r1p=r1;*r0p=r0;}
+static void f2135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-33);*r1p=r1;*r0p=r0;}
+static void f2136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,64);*r1p=r1;*r0p=r0;}
+static void f2137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-64);*r1p=r1;*r0p=r0;}
+static void f2138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,63);*r1p=r1;*r0p=r0;}
+static void f2139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-65);*r1p=r1;*r0p=r0;}
+static void f2140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,128);*r1p=r1;*r0p=r0;}
+static void f2141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-128);*r1p=r1;*r0p=r0;}
+static void f2142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,127);*r1p=r1;*r0p=r0;}
+static void f2143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-129);*r1p=r1;*r0p=r0;}
+static void f2144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,256);*r1p=r1;*r0p=r0;}
+static void f2145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-256);*r1p=r1;*r0p=r0;}
+static void f2146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,255);*r1p=r1;*r0p=r0;}
+static void f2147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-257);*r1p=r1;*r0p=r0;}
+static void f2148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,512);*r1p=r1;*r0p=r0;}
+static void f2149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-512);*r1p=r1;*r0p=r0;}
+static void f2150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,511);*r1p=r1;*r0p=r0;}
+static void f2151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-513);*r1p=r1;*r0p=r0;}
+static void f2152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,1024);*r1p=r1;*r0p=r0;}
+static void f2153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,1023);*r1p=r1;*r0p=r0;}
+static void f2155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,2048);*r1p=r1;*r0p=r0;}
+static void f2157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,2047);*r1p=r1;*r0p=r0;}
+static void f2159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,4096);*r1p=r1;*r0p=r0;}
+static void f2161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,4095);*r1p=r1;*r0p=r0;}
+static void f2163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,8192);*r1p=r1;*r0p=r0;}
+static void f2165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,8191);*r1p=r1;*r0p=r0;}
+static void f2167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,16384);*r1p=r1;*r0p=r0;}
+static void f2169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,16383);*r1p=r1;*r0p=r0;}
+static void f2171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,32768);*r1p=r1;*r0p=r0;}
+static void f2173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,32767);*r1p=r1;*r0p=r0;}
+static void f2175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-256,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,1);*r1p=r1;*r0p=r0;}
+static void f2177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-1);*r1p=r1;*r0p=r0;}
+static void f2178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,0);*r1p=r1;*r0p=r0;}
+static void f2179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-2);*r1p=r1;*r0p=r0;}
+static void f2180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,2);*r1p=r1;*r0p=r0;}
+static void f2181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-2);*r1p=r1;*r0p=r0;}
+static void f2182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,1);*r1p=r1;*r0p=r0;}
+static void f2183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-3);*r1p=r1;*r0p=r0;}
+static void f2184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,4);*r1p=r1;*r0p=r0;}
+static void f2185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-4);*r1p=r1;*r0p=r0;}
+static void f2186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,3);*r1p=r1;*r0p=r0;}
+static void f2187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-5);*r1p=r1;*r0p=r0;}
+static void f2188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,8);*r1p=r1;*r0p=r0;}
+static void f2189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-8);*r1p=r1;*r0p=r0;}
+static void f2190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,7);*r1p=r1;*r0p=r0;}
+static void f2191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-9);*r1p=r1;*r0p=r0;}
+static void f2192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,16);*r1p=r1;*r0p=r0;}
+static void f2193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-16);*r1p=r1;*r0p=r0;}
+static void f2194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,15);*r1p=r1;*r0p=r0;}
+static void f2195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-17);*r1p=r1;*r0p=r0;}
+static void f2196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,32);*r1p=r1;*r0p=r0;}
+static void f2197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-32);*r1p=r1;*r0p=r0;}
+static void f2198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,31);*r1p=r1;*r0p=r0;}
+static void f2199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-33);*r1p=r1;*r0p=r0;}
+static void f2200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,64);*r1p=r1;*r0p=r0;}
+static void f2201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-64);*r1p=r1;*r0p=r0;}
+static void f2202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,63);*r1p=r1;*r0p=r0;}
+static void f2203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-65);*r1p=r1;*r0p=r0;}
+static void f2204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,128);*r1p=r1;*r0p=r0;}
+static void f2205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-128);*r1p=r1;*r0p=r0;}
+static void f2206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,127);*r1p=r1;*r0p=r0;}
+static void f2207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-129);*r1p=r1;*r0p=r0;}
+static void f2208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,256);*r1p=r1;*r0p=r0;}
+static void f2209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-256);*r1p=r1;*r0p=r0;}
+static void f2210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,255);*r1p=r1;*r0p=r0;}
+static void f2211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-257);*r1p=r1;*r0p=r0;}
+static void f2212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,512);*r1p=r1;*r0p=r0;}
+static void f2213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-512);*r1p=r1;*r0p=r0;}
+static void f2214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,511);*r1p=r1;*r0p=r0;}
+static void f2215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-513);*r1p=r1;*r0p=r0;}
+static void f2216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,1024);*r1p=r1;*r0p=r0;}
+static void f2217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,1023);*r1p=r1;*r0p=r0;}
+static void f2219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,2048);*r1p=r1;*r0p=r0;}
+static void f2221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,2047);*r1p=r1;*r0p=r0;}
+static void f2223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,4096);*r1p=r1;*r0p=r0;}
+static void f2225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,4095);*r1p=r1;*r0p=r0;}
+static void f2227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,8192);*r1p=r1;*r0p=r0;}
+static void f2229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,8191);*r1p=r1;*r0p=r0;}
+static void f2231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,16384);*r1p=r1;*r0p=r0;}
+static void f2233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,16383);*r1p=r1;*r0p=r0;}
+static void f2235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,32768);*r1p=r1;*r0p=r0;}
+static void f2237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,32767);*r1p=r1;*r0p=r0;}
+static void f2239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,255,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,1);*r1p=r1;*r0p=r0;}
+static void f2241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-1);*r1p=r1;*r0p=r0;}
+static void f2242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,0);*r1p=r1;*r0p=r0;}
+static void f2243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-2);*r1p=r1;*r0p=r0;}
+static void f2244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,2);*r1p=r1;*r0p=r0;}
+static void f2245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-2);*r1p=r1;*r0p=r0;}
+static void f2246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,1);*r1p=r1;*r0p=r0;}
+static void f2247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-3);*r1p=r1;*r0p=r0;}
+static void f2248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,4);*r1p=r1;*r0p=r0;}
+static void f2249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-4);*r1p=r1;*r0p=r0;}
+static void f2250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,3);*r1p=r1;*r0p=r0;}
+static void f2251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-5);*r1p=r1;*r0p=r0;}
+static void f2252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,8);*r1p=r1;*r0p=r0;}
+static void f2253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-8);*r1p=r1;*r0p=r0;}
+static void f2254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,7);*r1p=r1;*r0p=r0;}
+static void f2255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-9);*r1p=r1;*r0p=r0;}
+static void f2256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,16);*r1p=r1;*r0p=r0;}
+static void f2257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-16);*r1p=r1;*r0p=r0;}
+static void f2258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,15);*r1p=r1;*r0p=r0;}
+static void f2259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-17);*r1p=r1;*r0p=r0;}
+static void f2260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,32);*r1p=r1;*r0p=r0;}
+static void f2261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-32);*r1p=r1;*r0p=r0;}
+static void f2262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,31);*r1p=r1;*r0p=r0;}
+static void f2263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-33);*r1p=r1;*r0p=r0;}
+static void f2264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,64);*r1p=r1;*r0p=r0;}
+static void f2265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-64);*r1p=r1;*r0p=r0;}
+static void f2266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,63);*r1p=r1;*r0p=r0;}
+static void f2267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-65);*r1p=r1;*r0p=r0;}
+static void f2268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,128);*r1p=r1;*r0p=r0;}
+static void f2269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-128);*r1p=r1;*r0p=r0;}
+static void f2270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,127);*r1p=r1;*r0p=r0;}
+static void f2271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-129);*r1p=r1;*r0p=r0;}
+static void f2272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,256);*r1p=r1;*r0p=r0;}
+static void f2273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-256);*r1p=r1;*r0p=r0;}
+static void f2274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,255);*r1p=r1;*r0p=r0;}
+static void f2275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-257);*r1p=r1;*r0p=r0;}
+static void f2276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,512);*r1p=r1;*r0p=r0;}
+static void f2277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-512);*r1p=r1;*r0p=r0;}
+static void f2278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,511);*r1p=r1;*r0p=r0;}
+static void f2279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-513);*r1p=r1;*r0p=r0;}
+static void f2280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,1024);*r1p=r1;*r0p=r0;}
+static void f2281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,1023);*r1p=r1;*r0p=r0;}
+static void f2283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,2048);*r1p=r1;*r0p=r0;}
+static void f2285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,2047);*r1p=r1;*r0p=r0;}
+static void f2287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,4096);*r1p=r1;*r0p=r0;}
+static void f2289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,4095);*r1p=r1;*r0p=r0;}
+static void f2291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,8192);*r1p=r1;*r0p=r0;}
+static void f2293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,8191);*r1p=r1;*r0p=r0;}
+static void f2295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,16384);*r1p=r1;*r0p=r0;}
+static void f2297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,16383);*r1p=r1;*r0p=r0;}
+static void f2299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,32768);*r1p=r1;*r0p=r0;}
+static void f2301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,32767);*r1p=r1;*r0p=r0;}
+static void f2303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-257,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,1);*r1p=r1;*r0p=r0;}
+static void f2305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-1);*r1p=r1;*r0p=r0;}
+static void f2306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,0);*r1p=r1;*r0p=r0;}
+static void f2307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,2);*r1p=r1;*r0p=r0;}
+static void f2309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,1);*r1p=r1;*r0p=r0;}
+static void f2311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-3);*r1p=r1;*r0p=r0;}
+static void f2312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,4);*r1p=r1;*r0p=r0;}
+static void f2313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-4);*r1p=r1;*r0p=r0;}
+static void f2314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,3);*r1p=r1;*r0p=r0;}
+static void f2315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-5);*r1p=r1;*r0p=r0;}
+static void f2316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,8);*r1p=r1;*r0p=r0;}
+static void f2317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-8);*r1p=r1;*r0p=r0;}
+static void f2318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,7);*r1p=r1;*r0p=r0;}
+static void f2319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-9);*r1p=r1;*r0p=r0;}
+static void f2320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,16);*r1p=r1;*r0p=r0;}
+static void f2321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-16);*r1p=r1;*r0p=r0;}
+static void f2322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,15);*r1p=r1;*r0p=r0;}
+static void f2323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-17);*r1p=r1;*r0p=r0;}
+static void f2324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,32);*r1p=r1;*r0p=r0;}
+static void f2325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-32);*r1p=r1;*r0p=r0;}
+static void f2326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,31);*r1p=r1;*r0p=r0;}
+static void f2327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-33);*r1p=r1;*r0p=r0;}
+static void f2328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,64);*r1p=r1;*r0p=r0;}
+static void f2329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-64);*r1p=r1;*r0p=r0;}
+static void f2330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,63);*r1p=r1;*r0p=r0;}
+static void f2331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-65);*r1p=r1;*r0p=r0;}
+static void f2332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,128);*r1p=r1;*r0p=r0;}
+static void f2333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-128);*r1p=r1;*r0p=r0;}
+static void f2334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,127);*r1p=r1;*r0p=r0;}
+static void f2335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-129);*r1p=r1;*r0p=r0;}
+static void f2336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,256);*r1p=r1;*r0p=r0;}
+static void f2337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-256);*r1p=r1;*r0p=r0;}
+static void f2338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,255);*r1p=r1;*r0p=r0;}
+static void f2339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-257);*r1p=r1;*r0p=r0;}
+static void f2340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,512);*r1p=r1;*r0p=r0;}
+static void f2341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-512);*r1p=r1;*r0p=r0;}
+static void f2342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,511);*r1p=r1;*r0p=r0;}
+static void f2343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-513);*r1p=r1;*r0p=r0;}
+static void f2344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,1024);*r1p=r1;*r0p=r0;}
+static void f2345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,1023);*r1p=r1;*r0p=r0;}
+static void f2347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,2048);*r1p=r1;*r0p=r0;}
+static void f2349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,2047);*r1p=r1;*r0p=r0;}
+static void f2351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,4096);*r1p=r1;*r0p=r0;}
+static void f2353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,4095);*r1p=r1;*r0p=r0;}
+static void f2355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,8192);*r1p=r1;*r0p=r0;}
+static void f2357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,8191);*r1p=r1;*r0p=r0;}
+static void f2359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,16384);*r1p=r1;*r0p=r0;}
+static void f2361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,16383);*r1p=r1;*r0p=r0;}
+static void f2363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,32768);*r1p=r1;*r0p=r0;}
+static void f2365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,32767);*r1p=r1;*r0p=r0;}
+static void f2367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,512,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,1);*r1p=r1;*r0p=r0;}
+static void f2369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-1);*r1p=r1;*r0p=r0;}
+static void f2370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,0);*r1p=r1;*r0p=r0;}
+static void f2371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,2);*r1p=r1;*r0p=r0;}
+static void f2373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,1);*r1p=r1;*r0p=r0;}
+static void f2375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-3);*r1p=r1;*r0p=r0;}
+static void f2376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,4);*r1p=r1;*r0p=r0;}
+static void f2377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-4);*r1p=r1;*r0p=r0;}
+static void f2378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,3);*r1p=r1;*r0p=r0;}
+static void f2379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-5);*r1p=r1;*r0p=r0;}
+static void f2380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,8);*r1p=r1;*r0p=r0;}
+static void f2381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-8);*r1p=r1;*r0p=r0;}
+static void f2382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,7);*r1p=r1;*r0p=r0;}
+static void f2383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-9);*r1p=r1;*r0p=r0;}
+static void f2384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,16);*r1p=r1;*r0p=r0;}
+static void f2385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-16);*r1p=r1;*r0p=r0;}
+static void f2386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,15);*r1p=r1;*r0p=r0;}
+static void f2387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-17);*r1p=r1;*r0p=r0;}
+static void f2388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,32);*r1p=r1;*r0p=r0;}
+static void f2389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-32);*r1p=r1;*r0p=r0;}
+static void f2390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,31);*r1p=r1;*r0p=r0;}
+static void f2391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-33);*r1p=r1;*r0p=r0;}
+static void f2392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,64);*r1p=r1;*r0p=r0;}
+static void f2393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-64);*r1p=r1;*r0p=r0;}
+static void f2394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,63);*r1p=r1;*r0p=r0;}
+static void f2395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-65);*r1p=r1;*r0p=r0;}
+static void f2396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,128);*r1p=r1;*r0p=r0;}
+static void f2397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-128);*r1p=r1;*r0p=r0;}
+static void f2398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,127);*r1p=r1;*r0p=r0;}
+static void f2399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-129);*r1p=r1;*r0p=r0;}
+static void f2400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,256);*r1p=r1;*r0p=r0;}
+static void f2401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-256);*r1p=r1;*r0p=r0;}
+static void f2402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,255);*r1p=r1;*r0p=r0;}
+static void f2403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-257);*r1p=r1;*r0p=r0;}
+static void f2404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,512);*r1p=r1;*r0p=r0;}
+static void f2405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-512);*r1p=r1;*r0p=r0;}
+static void f2406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,511);*r1p=r1;*r0p=r0;}
+static void f2407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-513);*r1p=r1;*r0p=r0;}
+static void f2408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,1024);*r1p=r1;*r0p=r0;}
+static void f2409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,1023);*r1p=r1;*r0p=r0;}
+static void f2411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,2048);*r1p=r1;*r0p=r0;}
+static void f2413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,2047);*r1p=r1;*r0p=r0;}
+static void f2415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,4096);*r1p=r1;*r0p=r0;}
+static void f2417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,4095);*r1p=r1;*r0p=r0;}
+static void f2419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,8192);*r1p=r1;*r0p=r0;}
+static void f2421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,8191);*r1p=r1;*r0p=r0;}
+static void f2423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,16384);*r1p=r1;*r0p=r0;}
+static void f2425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,16383);*r1p=r1;*r0p=r0;}
+static void f2427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,32768);*r1p=r1;*r0p=r0;}
+static void f2429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,32767);*r1p=r1;*r0p=r0;}
+static void f2431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-512,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,1);*r1p=r1;*r0p=r0;}
+static void f2433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-1);*r1p=r1;*r0p=r0;}
+static void f2434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,0);*r1p=r1;*r0p=r0;}
+static void f2435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-2);*r1p=r1;*r0p=r0;}
+static void f2436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,2);*r1p=r1;*r0p=r0;}
+static void f2437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-2);*r1p=r1;*r0p=r0;}
+static void f2438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,1);*r1p=r1;*r0p=r0;}
+static void f2439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-3);*r1p=r1;*r0p=r0;}
+static void f2440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,4);*r1p=r1;*r0p=r0;}
+static void f2441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-4);*r1p=r1;*r0p=r0;}
+static void f2442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,3);*r1p=r1;*r0p=r0;}
+static void f2443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-5);*r1p=r1;*r0p=r0;}
+static void f2444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,8);*r1p=r1;*r0p=r0;}
+static void f2445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-8);*r1p=r1;*r0p=r0;}
+static void f2446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,7);*r1p=r1;*r0p=r0;}
+static void f2447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-9);*r1p=r1;*r0p=r0;}
+static void f2448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,16);*r1p=r1;*r0p=r0;}
+static void f2449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-16);*r1p=r1;*r0p=r0;}
+static void f2450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,15);*r1p=r1;*r0p=r0;}
+static void f2451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-17);*r1p=r1;*r0p=r0;}
+static void f2452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,32);*r1p=r1;*r0p=r0;}
+static void f2453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-32);*r1p=r1;*r0p=r0;}
+static void f2454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,31);*r1p=r1;*r0p=r0;}
+static void f2455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-33);*r1p=r1;*r0p=r0;}
+static void f2456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,64);*r1p=r1;*r0p=r0;}
+static void f2457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-64);*r1p=r1;*r0p=r0;}
+static void f2458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,63);*r1p=r1;*r0p=r0;}
+static void f2459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-65);*r1p=r1;*r0p=r0;}
+static void f2460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,128);*r1p=r1;*r0p=r0;}
+static void f2461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-128);*r1p=r1;*r0p=r0;}
+static void f2462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,127);*r1p=r1;*r0p=r0;}
+static void f2463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-129);*r1p=r1;*r0p=r0;}
+static void f2464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,256);*r1p=r1;*r0p=r0;}
+static void f2465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-256);*r1p=r1;*r0p=r0;}
+static void f2466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,255);*r1p=r1;*r0p=r0;}
+static void f2467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-257);*r1p=r1;*r0p=r0;}
+static void f2468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,512);*r1p=r1;*r0p=r0;}
+static void f2469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-512);*r1p=r1;*r0p=r0;}
+static void f2470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,511);*r1p=r1;*r0p=r0;}
+static void f2471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-513);*r1p=r1;*r0p=r0;}
+static void f2472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,1024);*r1p=r1;*r0p=r0;}
+static void f2473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,1023);*r1p=r1;*r0p=r0;}
+static void f2475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,2048);*r1p=r1;*r0p=r0;}
+static void f2477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,2047);*r1p=r1;*r0p=r0;}
+static void f2479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,4096);*r1p=r1;*r0p=r0;}
+static void f2481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,4095);*r1p=r1;*r0p=r0;}
+static void f2483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,8192);*r1p=r1;*r0p=r0;}
+static void f2485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,8191);*r1p=r1;*r0p=r0;}
+static void f2487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,16384);*r1p=r1;*r0p=r0;}
+static void f2489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,16383);*r1p=r1;*r0p=r0;}
+static void f2491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,32768);*r1p=r1;*r0p=r0;}
+static void f2493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,32767);*r1p=r1;*r0p=r0;}
+static void f2495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,511,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,1);*r1p=r1;*r0p=r0;}
+static void f2497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-1);*r1p=r1;*r0p=r0;}
+static void f2498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,0);*r1p=r1;*r0p=r0;}
+static void f2499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-2);*r1p=r1;*r0p=r0;}
+static void f2500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,2);*r1p=r1;*r0p=r0;}
+static void f2501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-2);*r1p=r1;*r0p=r0;}
+static void f2502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,1);*r1p=r1;*r0p=r0;}
+static void f2503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-3);*r1p=r1;*r0p=r0;}
+static void f2504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,4);*r1p=r1;*r0p=r0;}
+static void f2505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-4);*r1p=r1;*r0p=r0;}
+static void f2506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,3);*r1p=r1;*r0p=r0;}
+static void f2507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-5);*r1p=r1;*r0p=r0;}
+static void f2508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,8);*r1p=r1;*r0p=r0;}
+static void f2509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-8);*r1p=r1;*r0p=r0;}
+static void f2510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,7);*r1p=r1;*r0p=r0;}
+static void f2511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-9);*r1p=r1;*r0p=r0;}
+static void f2512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,16);*r1p=r1;*r0p=r0;}
+static void f2513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-16);*r1p=r1;*r0p=r0;}
+static void f2514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,15);*r1p=r1;*r0p=r0;}
+static void f2515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-17);*r1p=r1;*r0p=r0;}
+static void f2516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,32);*r1p=r1;*r0p=r0;}
+static void f2517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-32);*r1p=r1;*r0p=r0;}
+static void f2518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,31);*r1p=r1;*r0p=r0;}
+static void f2519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-33);*r1p=r1;*r0p=r0;}
+static void f2520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,64);*r1p=r1;*r0p=r0;}
+static void f2521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-64);*r1p=r1;*r0p=r0;}
+static void f2522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,63);*r1p=r1;*r0p=r0;}
+static void f2523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-65);*r1p=r1;*r0p=r0;}
+static void f2524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,128);*r1p=r1;*r0p=r0;}
+static void f2525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-128);*r1p=r1;*r0p=r0;}
+static void f2526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,127);*r1p=r1;*r0p=r0;}
+static void f2527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-129);*r1p=r1;*r0p=r0;}
+static void f2528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,256);*r1p=r1;*r0p=r0;}
+static void f2529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-256);*r1p=r1;*r0p=r0;}
+static void f2530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,255);*r1p=r1;*r0p=r0;}
+static void f2531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-257);*r1p=r1;*r0p=r0;}
+static void f2532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,512);*r1p=r1;*r0p=r0;}
+static void f2533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-512);*r1p=r1;*r0p=r0;}
+static void f2534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,511);*r1p=r1;*r0p=r0;}
+static void f2535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-513);*r1p=r1;*r0p=r0;}
+static void f2536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,1024);*r1p=r1;*r0p=r0;}
+static void f2537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,1023);*r1p=r1;*r0p=r0;}
+static void f2539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,2048);*r1p=r1;*r0p=r0;}
+static void f2541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,2047);*r1p=r1;*r0p=r0;}
+static void f2543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,4096);*r1p=r1;*r0p=r0;}
+static void f2545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,4095);*r1p=r1;*r0p=r0;}
+static void f2547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,8192);*r1p=r1;*r0p=r0;}
+static void f2549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,8191);*r1p=r1;*r0p=r0;}
+static void f2551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,16384);*r1p=r1;*r0p=r0;}
+static void f2553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,16383);*r1p=r1;*r0p=r0;}
+static void f2555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,32768);*r1p=r1;*r0p=r0;}
+static void f2557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,32767);*r1p=r1;*r0p=r0;}
+static void f2559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-513,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-1);*r1p=r1;*r0p=r0;}
+static void f2562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,0);*r1p=r1;*r0p=r0;}
+static void f2563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,2);*r1p=r1;*r0p=r0;}
+static void f2565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-3);*r1p=r1;*r0p=r0;}
+static void f2568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,4);*r1p=r1;*r0p=r0;}
+static void f2569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-4);*r1p=r1;*r0p=r0;}
+static void f2570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,3);*r1p=r1;*r0p=r0;}
+static void f2571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-5);*r1p=r1;*r0p=r0;}
+static void f2572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,8);*r1p=r1;*r0p=r0;}
+static void f2573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-8);*r1p=r1;*r0p=r0;}
+static void f2574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,7);*r1p=r1;*r0p=r0;}
+static void f2575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-9);*r1p=r1;*r0p=r0;}
+static void f2576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,16);*r1p=r1;*r0p=r0;}
+static void f2577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-16);*r1p=r1;*r0p=r0;}
+static void f2578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,15);*r1p=r1;*r0p=r0;}
+static void f2579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-17);*r1p=r1;*r0p=r0;}
+static void f2580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,32);*r1p=r1;*r0p=r0;}
+static void f2581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-32);*r1p=r1;*r0p=r0;}
+static void f2582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,31);*r1p=r1;*r0p=r0;}
+static void f2583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-33);*r1p=r1;*r0p=r0;}
+static void f2584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,64);*r1p=r1;*r0p=r0;}
+static void f2585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-64);*r1p=r1;*r0p=r0;}
+static void f2586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,63);*r1p=r1;*r0p=r0;}
+static void f2587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-65);*r1p=r1;*r0p=r0;}
+static void f2588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,128);*r1p=r1;*r0p=r0;}
+static void f2589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-128);*r1p=r1;*r0p=r0;}
+static void f2590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,127);*r1p=r1;*r0p=r0;}
+static void f2591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-129);*r1p=r1;*r0p=r0;}
+static void f2592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,256);*r1p=r1;*r0p=r0;}
+static void f2593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-256);*r1p=r1;*r0p=r0;}
+static void f2594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,255);*r1p=r1;*r0p=r0;}
+static void f2595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-257);*r1p=r1;*r0p=r0;}
+static void f2596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,512);*r1p=r1;*r0p=r0;}
+static void f2597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-512);*r1p=r1;*r0p=r0;}
+static void f2598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,511);*r1p=r1;*r0p=r0;}
+static void f2599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-513);*r1p=r1;*r0p=r0;}
+static void f2600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,1024);*r1p=r1;*r0p=r0;}
+static void f2601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,1023);*r1p=r1;*r0p=r0;}
+static void f2603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,2048);*r1p=r1;*r0p=r0;}
+static void f2605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,2047);*r1p=r1;*r0p=r0;}
+static void f2607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,4096);*r1p=r1;*r0p=r0;}
+static void f2609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,4095);*r1p=r1;*r0p=r0;}
+static void f2611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,8192);*r1p=r1;*r0p=r0;}
+static void f2613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,8191);*r1p=r1;*r0p=r0;}
+static void f2615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,16384);*r1p=r1;*r0p=r0;}
+static void f2617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,16383);*r1p=r1;*r0p=r0;}
+static void f2619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,32768);*r1p=r1;*r0p=r0;}
+static void f2621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,32767);*r1p=r1;*r0p=r0;}
+static void f2623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1024,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-1);*r1p=r1;*r0p=r0;}
+static void f2626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,0);*r1p=r1;*r0p=r0;}
+static void f2627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,2);*r1p=r1;*r0p=r0;}
+static void f2629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-3);*r1p=r1;*r0p=r0;}
+static void f2632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,4);*r1p=r1;*r0p=r0;}
+static void f2633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-4);*r1p=r1;*r0p=r0;}
+static void f2634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,3);*r1p=r1;*r0p=r0;}
+static void f2635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-5);*r1p=r1;*r0p=r0;}
+static void f2636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,8);*r1p=r1;*r0p=r0;}
+static void f2637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-8);*r1p=r1;*r0p=r0;}
+static void f2638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,7);*r1p=r1;*r0p=r0;}
+static void f2639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-9);*r1p=r1;*r0p=r0;}
+static void f2640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,16);*r1p=r1;*r0p=r0;}
+static void f2641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-16);*r1p=r1;*r0p=r0;}
+static void f2642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,15);*r1p=r1;*r0p=r0;}
+static void f2643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-17);*r1p=r1;*r0p=r0;}
+static void f2644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,32);*r1p=r1;*r0p=r0;}
+static void f2645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-32);*r1p=r1;*r0p=r0;}
+static void f2646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,31);*r1p=r1;*r0p=r0;}
+static void f2647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-33);*r1p=r1;*r0p=r0;}
+static void f2648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,64);*r1p=r1;*r0p=r0;}
+static void f2649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-64);*r1p=r1;*r0p=r0;}
+static void f2650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,63);*r1p=r1;*r0p=r0;}
+static void f2651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-65);*r1p=r1;*r0p=r0;}
+static void f2652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,128);*r1p=r1;*r0p=r0;}
+static void f2653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-128);*r1p=r1;*r0p=r0;}
+static void f2654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,127);*r1p=r1;*r0p=r0;}
+static void f2655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-129);*r1p=r1;*r0p=r0;}
+static void f2656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,256);*r1p=r1;*r0p=r0;}
+static void f2657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-256);*r1p=r1;*r0p=r0;}
+static void f2658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,255);*r1p=r1;*r0p=r0;}
+static void f2659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-257);*r1p=r1;*r0p=r0;}
+static void f2660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,512);*r1p=r1;*r0p=r0;}
+static void f2661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-512);*r1p=r1;*r0p=r0;}
+static void f2662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,511);*r1p=r1;*r0p=r0;}
+static void f2663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-513);*r1p=r1;*r0p=r0;}
+static void f2664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,1024);*r1p=r1;*r0p=r0;}
+static void f2665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,1023);*r1p=r1;*r0p=r0;}
+static void f2667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,2048);*r1p=r1;*r0p=r0;}
+static void f2669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,2047);*r1p=r1;*r0p=r0;}
+static void f2671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,4096);*r1p=r1;*r0p=r0;}
+static void f2673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,4095);*r1p=r1;*r0p=r0;}
+static void f2675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,8192);*r1p=r1;*r0p=r0;}
+static void f2677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,8191);*r1p=r1;*r0p=r0;}
+static void f2679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,16384);*r1p=r1;*r0p=r0;}
+static void f2681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,16383);*r1p=r1;*r0p=r0;}
+static void f2683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,32768);*r1p=r1;*r0p=r0;}
+static void f2685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,32767);*r1p=r1;*r0p=r0;}
+static void f2687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1024,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,1);*r1p=r1;*r0p=r0;}
+static void f2689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-1);*r1p=r1;*r0p=r0;}
+static void f2690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,0);*r1p=r1;*r0p=r0;}
+static void f2691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-2);*r1p=r1;*r0p=r0;}
+static void f2692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,2);*r1p=r1;*r0p=r0;}
+static void f2693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-2);*r1p=r1;*r0p=r0;}
+static void f2694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,1);*r1p=r1;*r0p=r0;}
+static void f2695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-3);*r1p=r1;*r0p=r0;}
+static void f2696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,4);*r1p=r1;*r0p=r0;}
+static void f2697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-4);*r1p=r1;*r0p=r0;}
+static void f2698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,3);*r1p=r1;*r0p=r0;}
+static void f2699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-5);*r1p=r1;*r0p=r0;}
+static void f2700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,8);*r1p=r1;*r0p=r0;}
+static void f2701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-8);*r1p=r1;*r0p=r0;}
+static void f2702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,7);*r1p=r1;*r0p=r0;}
+static void f2703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-9);*r1p=r1;*r0p=r0;}
+static void f2704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,16);*r1p=r1;*r0p=r0;}
+static void f2705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-16);*r1p=r1;*r0p=r0;}
+static void f2706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,15);*r1p=r1;*r0p=r0;}
+static void f2707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-17);*r1p=r1;*r0p=r0;}
+static void f2708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,32);*r1p=r1;*r0p=r0;}
+static void f2709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-32);*r1p=r1;*r0p=r0;}
+static void f2710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,31);*r1p=r1;*r0p=r0;}
+static void f2711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-33);*r1p=r1;*r0p=r0;}
+static void f2712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,64);*r1p=r1;*r0p=r0;}
+static void f2713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-64);*r1p=r1;*r0p=r0;}
+static void f2714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,63);*r1p=r1;*r0p=r0;}
+static void f2715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-65);*r1p=r1;*r0p=r0;}
+static void f2716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,128);*r1p=r1;*r0p=r0;}
+static void f2717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-128);*r1p=r1;*r0p=r0;}
+static void f2718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,127);*r1p=r1;*r0p=r0;}
+static void f2719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-129);*r1p=r1;*r0p=r0;}
+static void f2720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,256);*r1p=r1;*r0p=r0;}
+static void f2721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-256);*r1p=r1;*r0p=r0;}
+static void f2722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,255);*r1p=r1;*r0p=r0;}
+static void f2723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-257);*r1p=r1;*r0p=r0;}
+static void f2724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,512);*r1p=r1;*r0p=r0;}
+static void f2725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-512);*r1p=r1;*r0p=r0;}
+static void f2726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,511);*r1p=r1;*r0p=r0;}
+static void f2727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-513);*r1p=r1;*r0p=r0;}
+static void f2728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,1024);*r1p=r1;*r0p=r0;}
+static void f2729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,1023);*r1p=r1;*r0p=r0;}
+static void f2731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,2048);*r1p=r1;*r0p=r0;}
+static void f2733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,2047);*r1p=r1;*r0p=r0;}
+static void f2735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,4096);*r1p=r1;*r0p=r0;}
+static void f2737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,4095);*r1p=r1;*r0p=r0;}
+static void f2739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,8192);*r1p=r1;*r0p=r0;}
+static void f2741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,8191);*r1p=r1;*r0p=r0;}
+static void f2743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,16384);*r1p=r1;*r0p=r0;}
+static void f2745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,16383);*r1p=r1;*r0p=r0;}
+static void f2747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,32768);*r1p=r1;*r0p=r0;}
+static void f2749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,32767);*r1p=r1;*r0p=r0;}
+static void f2751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,1023,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,1);*r1p=r1;*r0p=r0;}
+static void f2753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-1);*r1p=r1;*r0p=r0;}
+static void f2754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,0);*r1p=r1;*r0p=r0;}
+static void f2755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-2);*r1p=r1;*r0p=r0;}
+static void f2756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,2);*r1p=r1;*r0p=r0;}
+static void f2757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-2);*r1p=r1;*r0p=r0;}
+static void f2758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,1);*r1p=r1;*r0p=r0;}
+static void f2759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-3);*r1p=r1;*r0p=r0;}
+static void f2760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,4);*r1p=r1;*r0p=r0;}
+static void f2761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-4);*r1p=r1;*r0p=r0;}
+static void f2762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,3);*r1p=r1;*r0p=r0;}
+static void f2763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-5);*r1p=r1;*r0p=r0;}
+static void f2764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,8);*r1p=r1;*r0p=r0;}
+static void f2765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-8);*r1p=r1;*r0p=r0;}
+static void f2766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,7);*r1p=r1;*r0p=r0;}
+static void f2767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-9);*r1p=r1;*r0p=r0;}
+static void f2768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,16);*r1p=r1;*r0p=r0;}
+static void f2769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-16);*r1p=r1;*r0p=r0;}
+static void f2770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,15);*r1p=r1;*r0p=r0;}
+static void f2771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-17);*r1p=r1;*r0p=r0;}
+static void f2772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,32);*r1p=r1;*r0p=r0;}
+static void f2773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-32);*r1p=r1;*r0p=r0;}
+static void f2774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,31);*r1p=r1;*r0p=r0;}
+static void f2775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-33);*r1p=r1;*r0p=r0;}
+static void f2776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,64);*r1p=r1;*r0p=r0;}
+static void f2777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-64);*r1p=r1;*r0p=r0;}
+static void f2778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,63);*r1p=r1;*r0p=r0;}
+static void f2779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-65);*r1p=r1;*r0p=r0;}
+static void f2780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,128);*r1p=r1;*r0p=r0;}
+static void f2781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-128);*r1p=r1;*r0p=r0;}
+static void f2782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,127);*r1p=r1;*r0p=r0;}
+static void f2783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-129);*r1p=r1;*r0p=r0;}
+static void f2784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,256);*r1p=r1;*r0p=r0;}
+static void f2785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-256);*r1p=r1;*r0p=r0;}
+static void f2786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,255);*r1p=r1;*r0p=r0;}
+static void f2787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-257);*r1p=r1;*r0p=r0;}
+static void f2788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,512);*r1p=r1;*r0p=r0;}
+static void f2789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-512);*r1p=r1;*r0p=r0;}
+static void f2790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,511);*r1p=r1;*r0p=r0;}
+static void f2791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-513);*r1p=r1;*r0p=r0;}
+static void f2792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,1024);*r1p=r1;*r0p=r0;}
+static void f2793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,1023);*r1p=r1;*r0p=r0;}
+static void f2795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,2048);*r1p=r1;*r0p=r0;}
+static void f2797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,2047);*r1p=r1;*r0p=r0;}
+static void f2799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,4096);*r1p=r1;*r0p=r0;}
+static void f2801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,4095);*r1p=r1;*r0p=r0;}
+static void f2803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,8192);*r1p=r1;*r0p=r0;}
+static void f2805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,8191);*r1p=r1;*r0p=r0;}
+static void f2807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,16384);*r1p=r1;*r0p=r0;}
+static void f2809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,16383);*r1p=r1;*r0p=r0;}
+static void f2811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,32768);*r1p=r1;*r0p=r0;}
+static void f2813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,32767);*r1p=r1;*r0p=r0;}
+static void f2815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-1025,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-1);*r1p=r1;*r0p=r0;}
+static void f2818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,0);*r1p=r1;*r0p=r0;}
+static void f2819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,2);*r1p=r1;*r0p=r0;}
+static void f2821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-3);*r1p=r1;*r0p=r0;}
+static void f2824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,4);*r1p=r1;*r0p=r0;}
+static void f2825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-4);*r1p=r1;*r0p=r0;}
+static void f2826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,3);*r1p=r1;*r0p=r0;}
+static void f2827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-5);*r1p=r1;*r0p=r0;}
+static void f2828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,8);*r1p=r1;*r0p=r0;}
+static void f2829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-8);*r1p=r1;*r0p=r0;}
+static void f2830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,7);*r1p=r1;*r0p=r0;}
+static void f2831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-9);*r1p=r1;*r0p=r0;}
+static void f2832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,16);*r1p=r1;*r0p=r0;}
+static void f2833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-16);*r1p=r1;*r0p=r0;}
+static void f2834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,15);*r1p=r1;*r0p=r0;}
+static void f2835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-17);*r1p=r1;*r0p=r0;}
+static void f2836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,32);*r1p=r1;*r0p=r0;}
+static void f2837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-32);*r1p=r1;*r0p=r0;}
+static void f2838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,31);*r1p=r1;*r0p=r0;}
+static void f2839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-33);*r1p=r1;*r0p=r0;}
+static void f2840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,64);*r1p=r1;*r0p=r0;}
+static void f2841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-64);*r1p=r1;*r0p=r0;}
+static void f2842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,63);*r1p=r1;*r0p=r0;}
+static void f2843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-65);*r1p=r1;*r0p=r0;}
+static void f2844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,128);*r1p=r1;*r0p=r0;}
+static void f2845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-128);*r1p=r1;*r0p=r0;}
+static void f2846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,127);*r1p=r1;*r0p=r0;}
+static void f2847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-129);*r1p=r1;*r0p=r0;}
+static void f2848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,256);*r1p=r1;*r0p=r0;}
+static void f2849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-256);*r1p=r1;*r0p=r0;}
+static void f2850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,255);*r1p=r1;*r0p=r0;}
+static void f2851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-257);*r1p=r1;*r0p=r0;}
+static void f2852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,512);*r1p=r1;*r0p=r0;}
+static void f2853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-512);*r1p=r1;*r0p=r0;}
+static void f2854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,511);*r1p=r1;*r0p=r0;}
+static void f2855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-513);*r1p=r1;*r0p=r0;}
+static void f2856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,1024);*r1p=r1;*r0p=r0;}
+static void f2857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,1023);*r1p=r1;*r0p=r0;}
+static void f2859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,2048);*r1p=r1;*r0p=r0;}
+static void f2861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,2047);*r1p=r1;*r0p=r0;}
+static void f2863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,4096);*r1p=r1;*r0p=r0;}
+static void f2865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,4095);*r1p=r1;*r0p=r0;}
+static void f2867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,8192);*r1p=r1;*r0p=r0;}
+static void f2869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,8191);*r1p=r1;*r0p=r0;}
+static void f2871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,16384);*r1p=r1;*r0p=r0;}
+static void f2873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,16383);*r1p=r1;*r0p=r0;}
+static void f2875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,32768);*r1p=r1;*r0p=r0;}
+static void f2877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,32767);*r1p=r1;*r0p=r0;}
+static void f2879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2048,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-1);*r1p=r1;*r0p=r0;}
+static void f2882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,0);*r1p=r1;*r0p=r0;}
+static void f2883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,2);*r1p=r1;*r0p=r0;}
+static void f2885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-3);*r1p=r1;*r0p=r0;}
+static void f2888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,4);*r1p=r1;*r0p=r0;}
+static void f2889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-4);*r1p=r1;*r0p=r0;}
+static void f2890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,3);*r1p=r1;*r0p=r0;}
+static void f2891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-5);*r1p=r1;*r0p=r0;}
+static void f2892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,8);*r1p=r1;*r0p=r0;}
+static void f2893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-8);*r1p=r1;*r0p=r0;}
+static void f2894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,7);*r1p=r1;*r0p=r0;}
+static void f2895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-9);*r1p=r1;*r0p=r0;}
+static void f2896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,16);*r1p=r1;*r0p=r0;}
+static void f2897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-16);*r1p=r1;*r0p=r0;}
+static void f2898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,15);*r1p=r1;*r0p=r0;}
+static void f2899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-17);*r1p=r1;*r0p=r0;}
+static void f2900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,32);*r1p=r1;*r0p=r0;}
+static void f2901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-32);*r1p=r1;*r0p=r0;}
+static void f2902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,31);*r1p=r1;*r0p=r0;}
+static void f2903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-33);*r1p=r1;*r0p=r0;}
+static void f2904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,64);*r1p=r1;*r0p=r0;}
+static void f2905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-64);*r1p=r1;*r0p=r0;}
+static void f2906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,63);*r1p=r1;*r0p=r0;}
+static void f2907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-65);*r1p=r1;*r0p=r0;}
+static void f2908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,128);*r1p=r1;*r0p=r0;}
+static void f2909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-128);*r1p=r1;*r0p=r0;}
+static void f2910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,127);*r1p=r1;*r0p=r0;}
+static void f2911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-129);*r1p=r1;*r0p=r0;}
+static void f2912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,256);*r1p=r1;*r0p=r0;}
+static void f2913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-256);*r1p=r1;*r0p=r0;}
+static void f2914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,255);*r1p=r1;*r0p=r0;}
+static void f2915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-257);*r1p=r1;*r0p=r0;}
+static void f2916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,512);*r1p=r1;*r0p=r0;}
+static void f2917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-512);*r1p=r1;*r0p=r0;}
+static void f2918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,511);*r1p=r1;*r0p=r0;}
+static void f2919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-513);*r1p=r1;*r0p=r0;}
+static void f2920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,1024);*r1p=r1;*r0p=r0;}
+static void f2921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,1023);*r1p=r1;*r0p=r0;}
+static void f2923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,2048);*r1p=r1;*r0p=r0;}
+static void f2925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,2047);*r1p=r1;*r0p=r0;}
+static void f2927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,4096);*r1p=r1;*r0p=r0;}
+static void f2929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,4095);*r1p=r1;*r0p=r0;}
+static void f2931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,8192);*r1p=r1;*r0p=r0;}
+static void f2933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,8191);*r1p=r1;*r0p=r0;}
+static void f2935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,16384);*r1p=r1;*r0p=r0;}
+static void f2937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,16383);*r1p=r1;*r0p=r0;}
+static void f2939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,32768);*r1p=r1;*r0p=r0;}
+static void f2941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,32767);*r1p=r1;*r0p=r0;}
+static void f2943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2048,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,1);*r1p=r1;*r0p=r0;}
+static void f2945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-1);*r1p=r1;*r0p=r0;}
+static void f2946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,0);*r1p=r1;*r0p=r0;}
+static void f2947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-2);*r1p=r1;*r0p=r0;}
+static void f2948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,2);*r1p=r1;*r0p=r0;}
+static void f2949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-2);*r1p=r1;*r0p=r0;}
+static void f2950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,1);*r1p=r1;*r0p=r0;}
+static void f2951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-3);*r1p=r1;*r0p=r0;}
+static void f2952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,4);*r1p=r1;*r0p=r0;}
+static void f2953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-4);*r1p=r1;*r0p=r0;}
+static void f2954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,3);*r1p=r1;*r0p=r0;}
+static void f2955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-5);*r1p=r1;*r0p=r0;}
+static void f2956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,8);*r1p=r1;*r0p=r0;}
+static void f2957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-8);*r1p=r1;*r0p=r0;}
+static void f2958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,7);*r1p=r1;*r0p=r0;}
+static void f2959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-9);*r1p=r1;*r0p=r0;}
+static void f2960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,16);*r1p=r1;*r0p=r0;}
+static void f2961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-16);*r1p=r1;*r0p=r0;}
+static void f2962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,15);*r1p=r1;*r0p=r0;}
+static void f2963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-17);*r1p=r1;*r0p=r0;}
+static void f2964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,32);*r1p=r1;*r0p=r0;}
+static void f2965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-32);*r1p=r1;*r0p=r0;}
+static void f2966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,31);*r1p=r1;*r0p=r0;}
+static void f2967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-33);*r1p=r1;*r0p=r0;}
+static void f2968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,64);*r1p=r1;*r0p=r0;}
+static void f2969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-64);*r1p=r1;*r0p=r0;}
+static void f2970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,63);*r1p=r1;*r0p=r0;}
+static void f2971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-65);*r1p=r1;*r0p=r0;}
+static void f2972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,128);*r1p=r1;*r0p=r0;}
+static void f2973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-128);*r1p=r1;*r0p=r0;}
+static void f2974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,127);*r1p=r1;*r0p=r0;}
+static void f2975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-129);*r1p=r1;*r0p=r0;}
+static void f2976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,256);*r1p=r1;*r0p=r0;}
+static void f2977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-256);*r1p=r1;*r0p=r0;}
+static void f2978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,255);*r1p=r1;*r0p=r0;}
+static void f2979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-257);*r1p=r1;*r0p=r0;}
+static void f2980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,512);*r1p=r1;*r0p=r0;}
+static void f2981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-512);*r1p=r1;*r0p=r0;}
+static void f2982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,511);*r1p=r1;*r0p=r0;}
+static void f2983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-513);*r1p=r1;*r0p=r0;}
+static void f2984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,1024);*r1p=r1;*r0p=r0;}
+static void f2985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,1023);*r1p=r1;*r0p=r0;}
+static void f2987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,2048);*r1p=r1;*r0p=r0;}
+static void f2989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,2047);*r1p=r1;*r0p=r0;}
+static void f2991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,4096);*r1p=r1;*r0p=r0;}
+static void f2993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,4095);*r1p=r1;*r0p=r0;}
+static void f2995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,8192);*r1p=r1;*r0p=r0;}
+static void f2997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,8191);*r1p=r1;*r0p=r0;}
+static void f2999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,16384);*r1p=r1;*r0p=r0;}
+static void f3001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,16383);*r1p=r1;*r0p=r0;}
+static void f3003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,32768);*r1p=r1;*r0p=r0;}
+static void f3005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,32767);*r1p=r1;*r0p=r0;}
+static void f3007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,2047,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,1);*r1p=r1;*r0p=r0;}
+static void f3009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-1);*r1p=r1;*r0p=r0;}
+static void f3010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,0);*r1p=r1;*r0p=r0;}
+static void f3011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-2);*r1p=r1;*r0p=r0;}
+static void f3012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,2);*r1p=r1;*r0p=r0;}
+static void f3013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-2);*r1p=r1;*r0p=r0;}
+static void f3014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,1);*r1p=r1;*r0p=r0;}
+static void f3015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-3);*r1p=r1;*r0p=r0;}
+static void f3016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,4);*r1p=r1;*r0p=r0;}
+static void f3017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-4);*r1p=r1;*r0p=r0;}
+static void f3018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,3);*r1p=r1;*r0p=r0;}
+static void f3019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-5);*r1p=r1;*r0p=r0;}
+static void f3020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,8);*r1p=r1;*r0p=r0;}
+static void f3021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-8);*r1p=r1;*r0p=r0;}
+static void f3022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,7);*r1p=r1;*r0p=r0;}
+static void f3023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-9);*r1p=r1;*r0p=r0;}
+static void f3024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,16);*r1p=r1;*r0p=r0;}
+static void f3025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-16);*r1p=r1;*r0p=r0;}
+static void f3026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,15);*r1p=r1;*r0p=r0;}
+static void f3027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-17);*r1p=r1;*r0p=r0;}
+static void f3028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,32);*r1p=r1;*r0p=r0;}
+static void f3029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-32);*r1p=r1;*r0p=r0;}
+static void f3030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,31);*r1p=r1;*r0p=r0;}
+static void f3031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-33);*r1p=r1;*r0p=r0;}
+static void f3032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,64);*r1p=r1;*r0p=r0;}
+static void f3033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-64);*r1p=r1;*r0p=r0;}
+static void f3034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,63);*r1p=r1;*r0p=r0;}
+static void f3035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-65);*r1p=r1;*r0p=r0;}
+static void f3036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,128);*r1p=r1;*r0p=r0;}
+static void f3037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-128);*r1p=r1;*r0p=r0;}
+static void f3038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,127);*r1p=r1;*r0p=r0;}
+static void f3039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-129);*r1p=r1;*r0p=r0;}
+static void f3040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,256);*r1p=r1;*r0p=r0;}
+static void f3041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-256);*r1p=r1;*r0p=r0;}
+static void f3042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,255);*r1p=r1;*r0p=r0;}
+static void f3043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-257);*r1p=r1;*r0p=r0;}
+static void f3044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,512);*r1p=r1;*r0p=r0;}
+static void f3045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-512);*r1p=r1;*r0p=r0;}
+static void f3046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,511);*r1p=r1;*r0p=r0;}
+static void f3047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-513);*r1p=r1;*r0p=r0;}
+static void f3048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,1024);*r1p=r1;*r0p=r0;}
+static void f3049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,1023);*r1p=r1;*r0p=r0;}
+static void f3051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,2048);*r1p=r1;*r0p=r0;}
+static void f3053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,2047);*r1p=r1;*r0p=r0;}
+static void f3055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,4096);*r1p=r1;*r0p=r0;}
+static void f3057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,4095);*r1p=r1;*r0p=r0;}
+static void f3059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,8192);*r1p=r1;*r0p=r0;}
+static void f3061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,8191);*r1p=r1;*r0p=r0;}
+static void f3063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,16384);*r1p=r1;*r0p=r0;}
+static void f3065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,16383);*r1p=r1;*r0p=r0;}
+static void f3067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,32768);*r1p=r1;*r0p=r0;}
+static void f3069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,32767);*r1p=r1;*r0p=r0;}
+static void f3071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-2049,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-1);*r1p=r1;*r0p=r0;}
+static void f3074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,0);*r1p=r1;*r0p=r0;}
+static void f3075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,2);*r1p=r1;*r0p=r0;}
+static void f3077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-3);*r1p=r1;*r0p=r0;}
+static void f3080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,4);*r1p=r1;*r0p=r0;}
+static void f3081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-4);*r1p=r1;*r0p=r0;}
+static void f3082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,3);*r1p=r1;*r0p=r0;}
+static void f3083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-5);*r1p=r1;*r0p=r0;}
+static void f3084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,8);*r1p=r1;*r0p=r0;}
+static void f3085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-8);*r1p=r1;*r0p=r0;}
+static void f3086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,7);*r1p=r1;*r0p=r0;}
+static void f3087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-9);*r1p=r1;*r0p=r0;}
+static void f3088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,16);*r1p=r1;*r0p=r0;}
+static void f3089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-16);*r1p=r1;*r0p=r0;}
+static void f3090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,15);*r1p=r1;*r0p=r0;}
+static void f3091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-17);*r1p=r1;*r0p=r0;}
+static void f3092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,32);*r1p=r1;*r0p=r0;}
+static void f3093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-32);*r1p=r1;*r0p=r0;}
+static void f3094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,31);*r1p=r1;*r0p=r0;}
+static void f3095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-33);*r1p=r1;*r0p=r0;}
+static void f3096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,64);*r1p=r1;*r0p=r0;}
+static void f3097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-64);*r1p=r1;*r0p=r0;}
+static void f3098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,63);*r1p=r1;*r0p=r0;}
+static void f3099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-65);*r1p=r1;*r0p=r0;}
+static void f3100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,128);*r1p=r1;*r0p=r0;}
+static void f3101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-128);*r1p=r1;*r0p=r0;}
+static void f3102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,127);*r1p=r1;*r0p=r0;}
+static void f3103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-129);*r1p=r1;*r0p=r0;}
+static void f3104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,256);*r1p=r1;*r0p=r0;}
+static void f3105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-256);*r1p=r1;*r0p=r0;}
+static void f3106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,255);*r1p=r1;*r0p=r0;}
+static void f3107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-257);*r1p=r1;*r0p=r0;}
+static void f3108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,512);*r1p=r1;*r0p=r0;}
+static void f3109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-512);*r1p=r1;*r0p=r0;}
+static void f3110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,511);*r1p=r1;*r0p=r0;}
+static void f3111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-513);*r1p=r1;*r0p=r0;}
+static void f3112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,1024);*r1p=r1;*r0p=r0;}
+static void f3113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,1023);*r1p=r1;*r0p=r0;}
+static void f3115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,2048);*r1p=r1;*r0p=r0;}
+static void f3117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,2047);*r1p=r1;*r0p=r0;}
+static void f3119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,4096);*r1p=r1;*r0p=r0;}
+static void f3121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,4095);*r1p=r1;*r0p=r0;}
+static void f3123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,8192);*r1p=r1;*r0p=r0;}
+static void f3125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,8191);*r1p=r1;*r0p=r0;}
+static void f3127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,16384);*r1p=r1;*r0p=r0;}
+static void f3129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,16383);*r1p=r1;*r0p=r0;}
+static void f3131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,32768);*r1p=r1;*r0p=r0;}
+static void f3133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,32767);*r1p=r1;*r0p=r0;}
+static void f3135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4096,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-1);*r1p=r1;*r0p=r0;}
+static void f3138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,0);*r1p=r1;*r0p=r0;}
+static void f3139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,2);*r1p=r1;*r0p=r0;}
+static void f3141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-3);*r1p=r1;*r0p=r0;}
+static void f3144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,4);*r1p=r1;*r0p=r0;}
+static void f3145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-4);*r1p=r1;*r0p=r0;}
+static void f3146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,3);*r1p=r1;*r0p=r0;}
+static void f3147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-5);*r1p=r1;*r0p=r0;}
+static void f3148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,8);*r1p=r1;*r0p=r0;}
+static void f3149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-8);*r1p=r1;*r0p=r0;}
+static void f3150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,7);*r1p=r1;*r0p=r0;}
+static void f3151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-9);*r1p=r1;*r0p=r0;}
+static void f3152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,16);*r1p=r1;*r0p=r0;}
+static void f3153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-16);*r1p=r1;*r0p=r0;}
+static void f3154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,15);*r1p=r1;*r0p=r0;}
+static void f3155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-17);*r1p=r1;*r0p=r0;}
+static void f3156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,32);*r1p=r1;*r0p=r0;}
+static void f3157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-32);*r1p=r1;*r0p=r0;}
+static void f3158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,31);*r1p=r1;*r0p=r0;}
+static void f3159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-33);*r1p=r1;*r0p=r0;}
+static void f3160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,64);*r1p=r1;*r0p=r0;}
+static void f3161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-64);*r1p=r1;*r0p=r0;}
+static void f3162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,63);*r1p=r1;*r0p=r0;}
+static void f3163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-65);*r1p=r1;*r0p=r0;}
+static void f3164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,128);*r1p=r1;*r0p=r0;}
+static void f3165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-128);*r1p=r1;*r0p=r0;}
+static void f3166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,127);*r1p=r1;*r0p=r0;}
+static void f3167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-129);*r1p=r1;*r0p=r0;}
+static void f3168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,256);*r1p=r1;*r0p=r0;}
+static void f3169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-256);*r1p=r1;*r0p=r0;}
+static void f3170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,255);*r1p=r1;*r0p=r0;}
+static void f3171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-257);*r1p=r1;*r0p=r0;}
+static void f3172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,512);*r1p=r1;*r0p=r0;}
+static void f3173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-512);*r1p=r1;*r0p=r0;}
+static void f3174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,511);*r1p=r1;*r0p=r0;}
+static void f3175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-513);*r1p=r1;*r0p=r0;}
+static void f3176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,1024);*r1p=r1;*r0p=r0;}
+static void f3177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,1023);*r1p=r1;*r0p=r0;}
+static void f3179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,2048);*r1p=r1;*r0p=r0;}
+static void f3181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,2047);*r1p=r1;*r0p=r0;}
+static void f3183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,4096);*r1p=r1;*r0p=r0;}
+static void f3185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,4095);*r1p=r1;*r0p=r0;}
+static void f3187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,8192);*r1p=r1;*r0p=r0;}
+static void f3189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,8191);*r1p=r1;*r0p=r0;}
+static void f3191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,16384);*r1p=r1;*r0p=r0;}
+static void f3193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,16383);*r1p=r1;*r0p=r0;}
+static void f3195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,32768);*r1p=r1;*r0p=r0;}
+static void f3197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,32767);*r1p=r1;*r0p=r0;}
+static void f3199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4096,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,1);*r1p=r1;*r0p=r0;}
+static void f3201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-1);*r1p=r1;*r0p=r0;}
+static void f3202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,0);*r1p=r1;*r0p=r0;}
+static void f3203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-2);*r1p=r1;*r0p=r0;}
+static void f3204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,2);*r1p=r1;*r0p=r0;}
+static void f3205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-2);*r1p=r1;*r0p=r0;}
+static void f3206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,1);*r1p=r1;*r0p=r0;}
+static void f3207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-3);*r1p=r1;*r0p=r0;}
+static void f3208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,4);*r1p=r1;*r0p=r0;}
+static void f3209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-4);*r1p=r1;*r0p=r0;}
+static void f3210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,3);*r1p=r1;*r0p=r0;}
+static void f3211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-5);*r1p=r1;*r0p=r0;}
+static void f3212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,8);*r1p=r1;*r0p=r0;}
+static void f3213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-8);*r1p=r1;*r0p=r0;}
+static void f3214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,7);*r1p=r1;*r0p=r0;}
+static void f3215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-9);*r1p=r1;*r0p=r0;}
+static void f3216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,16);*r1p=r1;*r0p=r0;}
+static void f3217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-16);*r1p=r1;*r0p=r0;}
+static void f3218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,15);*r1p=r1;*r0p=r0;}
+static void f3219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-17);*r1p=r1;*r0p=r0;}
+static void f3220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,32);*r1p=r1;*r0p=r0;}
+static void f3221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-32);*r1p=r1;*r0p=r0;}
+static void f3222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,31);*r1p=r1;*r0p=r0;}
+static void f3223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-33);*r1p=r1;*r0p=r0;}
+static void f3224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,64);*r1p=r1;*r0p=r0;}
+static void f3225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-64);*r1p=r1;*r0p=r0;}
+static void f3226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,63);*r1p=r1;*r0p=r0;}
+static void f3227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-65);*r1p=r1;*r0p=r0;}
+static void f3228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,128);*r1p=r1;*r0p=r0;}
+static void f3229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-128);*r1p=r1;*r0p=r0;}
+static void f3230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,127);*r1p=r1;*r0p=r0;}
+static void f3231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-129);*r1p=r1;*r0p=r0;}
+static void f3232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,256);*r1p=r1;*r0p=r0;}
+static void f3233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-256);*r1p=r1;*r0p=r0;}
+static void f3234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,255);*r1p=r1;*r0p=r0;}
+static void f3235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-257);*r1p=r1;*r0p=r0;}
+static void f3236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,512);*r1p=r1;*r0p=r0;}
+static void f3237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-512);*r1p=r1;*r0p=r0;}
+static void f3238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,511);*r1p=r1;*r0p=r0;}
+static void f3239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-513);*r1p=r1;*r0p=r0;}
+static void f3240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,1024);*r1p=r1;*r0p=r0;}
+static void f3241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,1023);*r1p=r1;*r0p=r0;}
+static void f3243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,2048);*r1p=r1;*r0p=r0;}
+static void f3245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,2047);*r1p=r1;*r0p=r0;}
+static void f3247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,4096);*r1p=r1;*r0p=r0;}
+static void f3249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,4095);*r1p=r1;*r0p=r0;}
+static void f3251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,8192);*r1p=r1;*r0p=r0;}
+static void f3253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,8191);*r1p=r1;*r0p=r0;}
+static void f3255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,16384);*r1p=r1;*r0p=r0;}
+static void f3257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,16383);*r1p=r1;*r0p=r0;}
+static void f3259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,32768);*r1p=r1;*r0p=r0;}
+static void f3261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,32767);*r1p=r1;*r0p=r0;}
+static void f3263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,4095,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,1);*r1p=r1;*r0p=r0;}
+static void f3265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-1);*r1p=r1;*r0p=r0;}
+static void f3266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,0);*r1p=r1;*r0p=r0;}
+static void f3267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-2);*r1p=r1;*r0p=r0;}
+static void f3268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,2);*r1p=r1;*r0p=r0;}
+static void f3269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-2);*r1p=r1;*r0p=r0;}
+static void f3270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,1);*r1p=r1;*r0p=r0;}
+static void f3271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-3);*r1p=r1;*r0p=r0;}
+static void f3272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,4);*r1p=r1;*r0p=r0;}
+static void f3273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-4);*r1p=r1;*r0p=r0;}
+static void f3274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,3);*r1p=r1;*r0p=r0;}
+static void f3275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-5);*r1p=r1;*r0p=r0;}
+static void f3276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,8);*r1p=r1;*r0p=r0;}
+static void f3277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-8);*r1p=r1;*r0p=r0;}
+static void f3278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,7);*r1p=r1;*r0p=r0;}
+static void f3279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-9);*r1p=r1;*r0p=r0;}
+static void f3280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,16);*r1p=r1;*r0p=r0;}
+static void f3281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-16);*r1p=r1;*r0p=r0;}
+static void f3282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,15);*r1p=r1;*r0p=r0;}
+static void f3283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-17);*r1p=r1;*r0p=r0;}
+static void f3284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,32);*r1p=r1;*r0p=r0;}
+static void f3285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-32);*r1p=r1;*r0p=r0;}
+static void f3286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,31);*r1p=r1;*r0p=r0;}
+static void f3287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-33);*r1p=r1;*r0p=r0;}
+static void f3288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,64);*r1p=r1;*r0p=r0;}
+static void f3289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-64);*r1p=r1;*r0p=r0;}
+static void f3290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,63);*r1p=r1;*r0p=r0;}
+static void f3291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-65);*r1p=r1;*r0p=r0;}
+static void f3292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,128);*r1p=r1;*r0p=r0;}
+static void f3293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-128);*r1p=r1;*r0p=r0;}
+static void f3294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,127);*r1p=r1;*r0p=r0;}
+static void f3295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-129);*r1p=r1;*r0p=r0;}
+static void f3296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,256);*r1p=r1;*r0p=r0;}
+static void f3297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-256);*r1p=r1;*r0p=r0;}
+static void f3298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,255);*r1p=r1;*r0p=r0;}
+static void f3299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-257);*r1p=r1;*r0p=r0;}
+static void f3300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,512);*r1p=r1;*r0p=r0;}
+static void f3301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-512);*r1p=r1;*r0p=r0;}
+static void f3302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,511);*r1p=r1;*r0p=r0;}
+static void f3303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-513);*r1p=r1;*r0p=r0;}
+static void f3304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,1024);*r1p=r1;*r0p=r0;}
+static void f3305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,1023);*r1p=r1;*r0p=r0;}
+static void f3307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,2048);*r1p=r1;*r0p=r0;}
+static void f3309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,2047);*r1p=r1;*r0p=r0;}
+static void f3311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,4096);*r1p=r1;*r0p=r0;}
+static void f3313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,4095);*r1p=r1;*r0p=r0;}
+static void f3315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,8192);*r1p=r1;*r0p=r0;}
+static void f3317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,8191);*r1p=r1;*r0p=r0;}
+static void f3319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,16384);*r1p=r1;*r0p=r0;}
+static void f3321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,16383);*r1p=r1;*r0p=r0;}
+static void f3323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,32768);*r1p=r1;*r0p=r0;}
+static void f3325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,32767);*r1p=r1;*r0p=r0;}
+static void f3327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-4097,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-1);*r1p=r1;*r0p=r0;}
+static void f3330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,0);*r1p=r1;*r0p=r0;}
+static void f3331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,2);*r1p=r1;*r0p=r0;}
+static void f3333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-3);*r1p=r1;*r0p=r0;}
+static void f3336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,4);*r1p=r1;*r0p=r0;}
+static void f3337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-4);*r1p=r1;*r0p=r0;}
+static void f3338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,3);*r1p=r1;*r0p=r0;}
+static void f3339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-5);*r1p=r1;*r0p=r0;}
+static void f3340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,8);*r1p=r1;*r0p=r0;}
+static void f3341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-8);*r1p=r1;*r0p=r0;}
+static void f3342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,7);*r1p=r1;*r0p=r0;}
+static void f3343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-9);*r1p=r1;*r0p=r0;}
+static void f3344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,16);*r1p=r1;*r0p=r0;}
+static void f3345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-16);*r1p=r1;*r0p=r0;}
+static void f3346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,15);*r1p=r1;*r0p=r0;}
+static void f3347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-17);*r1p=r1;*r0p=r0;}
+static void f3348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,32);*r1p=r1;*r0p=r0;}
+static void f3349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-32);*r1p=r1;*r0p=r0;}
+static void f3350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,31);*r1p=r1;*r0p=r0;}
+static void f3351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-33);*r1p=r1;*r0p=r0;}
+static void f3352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,64);*r1p=r1;*r0p=r0;}
+static void f3353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-64);*r1p=r1;*r0p=r0;}
+static void f3354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,63);*r1p=r1;*r0p=r0;}
+static void f3355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-65);*r1p=r1;*r0p=r0;}
+static void f3356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,128);*r1p=r1;*r0p=r0;}
+static void f3357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-128);*r1p=r1;*r0p=r0;}
+static void f3358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,127);*r1p=r1;*r0p=r0;}
+static void f3359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-129);*r1p=r1;*r0p=r0;}
+static void f3360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,256);*r1p=r1;*r0p=r0;}
+static void f3361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-256);*r1p=r1;*r0p=r0;}
+static void f3362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,255);*r1p=r1;*r0p=r0;}
+static void f3363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-257);*r1p=r1;*r0p=r0;}
+static void f3364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,512);*r1p=r1;*r0p=r0;}
+static void f3365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-512);*r1p=r1;*r0p=r0;}
+static void f3366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,511);*r1p=r1;*r0p=r0;}
+static void f3367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-513);*r1p=r1;*r0p=r0;}
+static void f3368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,1024);*r1p=r1;*r0p=r0;}
+static void f3369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,1023);*r1p=r1;*r0p=r0;}
+static void f3371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,2048);*r1p=r1;*r0p=r0;}
+static void f3373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,2047);*r1p=r1;*r0p=r0;}
+static void f3375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,4096);*r1p=r1;*r0p=r0;}
+static void f3377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,4095);*r1p=r1;*r0p=r0;}
+static void f3379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,8192);*r1p=r1;*r0p=r0;}
+static void f3381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,8191);*r1p=r1;*r0p=r0;}
+static void f3383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,16384);*r1p=r1;*r0p=r0;}
+static void f3385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,16383);*r1p=r1;*r0p=r0;}
+static void f3387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,32768);*r1p=r1;*r0p=r0;}
+static void f3389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,32767);*r1p=r1;*r0p=r0;}
+static void f3391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8192,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-1);*r1p=r1;*r0p=r0;}
+static void f3394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,0);*r1p=r1;*r0p=r0;}
+static void f3395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,2);*r1p=r1;*r0p=r0;}
+static void f3397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-3);*r1p=r1;*r0p=r0;}
+static void f3400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,4);*r1p=r1;*r0p=r0;}
+static void f3401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-4);*r1p=r1;*r0p=r0;}
+static void f3402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,3);*r1p=r1;*r0p=r0;}
+static void f3403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-5);*r1p=r1;*r0p=r0;}
+static void f3404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,8);*r1p=r1;*r0p=r0;}
+static void f3405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-8);*r1p=r1;*r0p=r0;}
+static void f3406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,7);*r1p=r1;*r0p=r0;}
+static void f3407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-9);*r1p=r1;*r0p=r0;}
+static void f3408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,16);*r1p=r1;*r0p=r0;}
+static void f3409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-16);*r1p=r1;*r0p=r0;}
+static void f3410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,15);*r1p=r1;*r0p=r0;}
+static void f3411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-17);*r1p=r1;*r0p=r0;}
+static void f3412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,32);*r1p=r1;*r0p=r0;}
+static void f3413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-32);*r1p=r1;*r0p=r0;}
+static void f3414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,31);*r1p=r1;*r0p=r0;}
+static void f3415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-33);*r1p=r1;*r0p=r0;}
+static void f3416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,64);*r1p=r1;*r0p=r0;}
+static void f3417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-64);*r1p=r1;*r0p=r0;}
+static void f3418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,63);*r1p=r1;*r0p=r0;}
+static void f3419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-65);*r1p=r1;*r0p=r0;}
+static void f3420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,128);*r1p=r1;*r0p=r0;}
+static void f3421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-128);*r1p=r1;*r0p=r0;}
+static void f3422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,127);*r1p=r1;*r0p=r0;}
+static void f3423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-129);*r1p=r1;*r0p=r0;}
+static void f3424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,256);*r1p=r1;*r0p=r0;}
+static void f3425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-256);*r1p=r1;*r0p=r0;}
+static void f3426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,255);*r1p=r1;*r0p=r0;}
+static void f3427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-257);*r1p=r1;*r0p=r0;}
+static void f3428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,512);*r1p=r1;*r0p=r0;}
+static void f3429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-512);*r1p=r1;*r0p=r0;}
+static void f3430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,511);*r1p=r1;*r0p=r0;}
+static void f3431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-513);*r1p=r1;*r0p=r0;}
+static void f3432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,1024);*r1p=r1;*r0p=r0;}
+static void f3433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,1023);*r1p=r1;*r0p=r0;}
+static void f3435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,2048);*r1p=r1;*r0p=r0;}
+static void f3437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,2047);*r1p=r1;*r0p=r0;}
+static void f3439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,4096);*r1p=r1;*r0p=r0;}
+static void f3441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,4095);*r1p=r1;*r0p=r0;}
+static void f3443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,8192);*r1p=r1;*r0p=r0;}
+static void f3445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,8191);*r1p=r1;*r0p=r0;}
+static void f3447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,16384);*r1p=r1;*r0p=r0;}
+static void f3449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,16383);*r1p=r1;*r0p=r0;}
+static void f3451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,32768);*r1p=r1;*r0p=r0;}
+static void f3453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,32767);*r1p=r1;*r0p=r0;}
+static void f3455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8192,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,1);*r1p=r1;*r0p=r0;}
+static void f3457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-1);*r1p=r1;*r0p=r0;}
+static void f3458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,0);*r1p=r1;*r0p=r0;}
+static void f3459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-2);*r1p=r1;*r0p=r0;}
+static void f3460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,2);*r1p=r1;*r0p=r0;}
+static void f3461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-2);*r1p=r1;*r0p=r0;}
+static void f3462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,1);*r1p=r1;*r0p=r0;}
+static void f3463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-3);*r1p=r1;*r0p=r0;}
+static void f3464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,4);*r1p=r1;*r0p=r0;}
+static void f3465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-4);*r1p=r1;*r0p=r0;}
+static void f3466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,3);*r1p=r1;*r0p=r0;}
+static void f3467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-5);*r1p=r1;*r0p=r0;}
+static void f3468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,8);*r1p=r1;*r0p=r0;}
+static void f3469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-8);*r1p=r1;*r0p=r0;}
+static void f3470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,7);*r1p=r1;*r0p=r0;}
+static void f3471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-9);*r1p=r1;*r0p=r0;}
+static void f3472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,16);*r1p=r1;*r0p=r0;}
+static void f3473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-16);*r1p=r1;*r0p=r0;}
+static void f3474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,15);*r1p=r1;*r0p=r0;}
+static void f3475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-17);*r1p=r1;*r0p=r0;}
+static void f3476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,32);*r1p=r1;*r0p=r0;}
+static void f3477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-32);*r1p=r1;*r0p=r0;}
+static void f3478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,31);*r1p=r1;*r0p=r0;}
+static void f3479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-33);*r1p=r1;*r0p=r0;}
+static void f3480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,64);*r1p=r1;*r0p=r0;}
+static void f3481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-64);*r1p=r1;*r0p=r0;}
+static void f3482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,63);*r1p=r1;*r0p=r0;}
+static void f3483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-65);*r1p=r1;*r0p=r0;}
+static void f3484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,128);*r1p=r1;*r0p=r0;}
+static void f3485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-128);*r1p=r1;*r0p=r0;}
+static void f3486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,127);*r1p=r1;*r0p=r0;}
+static void f3487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-129);*r1p=r1;*r0p=r0;}
+static void f3488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,256);*r1p=r1;*r0p=r0;}
+static void f3489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-256);*r1p=r1;*r0p=r0;}
+static void f3490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,255);*r1p=r1;*r0p=r0;}
+static void f3491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-257);*r1p=r1;*r0p=r0;}
+static void f3492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,512);*r1p=r1;*r0p=r0;}
+static void f3493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-512);*r1p=r1;*r0p=r0;}
+static void f3494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,511);*r1p=r1;*r0p=r0;}
+static void f3495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-513);*r1p=r1;*r0p=r0;}
+static void f3496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,1024);*r1p=r1;*r0p=r0;}
+static void f3497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,1023);*r1p=r1;*r0p=r0;}
+static void f3499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,2048);*r1p=r1;*r0p=r0;}
+static void f3501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,2047);*r1p=r1;*r0p=r0;}
+static void f3503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,4096);*r1p=r1;*r0p=r0;}
+static void f3505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,4095);*r1p=r1;*r0p=r0;}
+static void f3507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,8192);*r1p=r1;*r0p=r0;}
+static void f3509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,8191);*r1p=r1;*r0p=r0;}
+static void f3511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,16384);*r1p=r1;*r0p=r0;}
+static void f3513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,16383);*r1p=r1;*r0p=r0;}
+static void f3515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,32768);*r1p=r1;*r0p=r0;}
+static void f3517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,32767);*r1p=r1;*r0p=r0;}
+static void f3519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,8191,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,1);*r1p=r1;*r0p=r0;}
+static void f3521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-1);*r1p=r1;*r0p=r0;}
+static void f3522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,0);*r1p=r1;*r0p=r0;}
+static void f3523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-2);*r1p=r1;*r0p=r0;}
+static void f3524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,2);*r1p=r1;*r0p=r0;}
+static void f3525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-2);*r1p=r1;*r0p=r0;}
+static void f3526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,1);*r1p=r1;*r0p=r0;}
+static void f3527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-3);*r1p=r1;*r0p=r0;}
+static void f3528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,4);*r1p=r1;*r0p=r0;}
+static void f3529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-4);*r1p=r1;*r0p=r0;}
+static void f3530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,3);*r1p=r1;*r0p=r0;}
+static void f3531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-5);*r1p=r1;*r0p=r0;}
+static void f3532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,8);*r1p=r1;*r0p=r0;}
+static void f3533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-8);*r1p=r1;*r0p=r0;}
+static void f3534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,7);*r1p=r1;*r0p=r0;}
+static void f3535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-9);*r1p=r1;*r0p=r0;}
+static void f3536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,16);*r1p=r1;*r0p=r0;}
+static void f3537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-16);*r1p=r1;*r0p=r0;}
+static void f3538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,15);*r1p=r1;*r0p=r0;}
+static void f3539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-17);*r1p=r1;*r0p=r0;}
+static void f3540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,32);*r1p=r1;*r0p=r0;}
+static void f3541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-32);*r1p=r1;*r0p=r0;}
+static void f3542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,31);*r1p=r1;*r0p=r0;}
+static void f3543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-33);*r1p=r1;*r0p=r0;}
+static void f3544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,64);*r1p=r1;*r0p=r0;}
+static void f3545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-64);*r1p=r1;*r0p=r0;}
+static void f3546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,63);*r1p=r1;*r0p=r0;}
+static void f3547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-65);*r1p=r1;*r0p=r0;}
+static void f3548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,128);*r1p=r1;*r0p=r0;}
+static void f3549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-128);*r1p=r1;*r0p=r0;}
+static void f3550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,127);*r1p=r1;*r0p=r0;}
+static void f3551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-129);*r1p=r1;*r0p=r0;}
+static void f3552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,256);*r1p=r1;*r0p=r0;}
+static void f3553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-256);*r1p=r1;*r0p=r0;}
+static void f3554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,255);*r1p=r1;*r0p=r0;}
+static void f3555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-257);*r1p=r1;*r0p=r0;}
+static void f3556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,512);*r1p=r1;*r0p=r0;}
+static void f3557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-512);*r1p=r1;*r0p=r0;}
+static void f3558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,511);*r1p=r1;*r0p=r0;}
+static void f3559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-513);*r1p=r1;*r0p=r0;}
+static void f3560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,1024);*r1p=r1;*r0p=r0;}
+static void f3561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,1023);*r1p=r1;*r0p=r0;}
+static void f3563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,2048);*r1p=r1;*r0p=r0;}
+static void f3565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,2047);*r1p=r1;*r0p=r0;}
+static void f3567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,4096);*r1p=r1;*r0p=r0;}
+static void f3569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,4095);*r1p=r1;*r0p=r0;}
+static void f3571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,8192);*r1p=r1;*r0p=r0;}
+static void f3573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,8191);*r1p=r1;*r0p=r0;}
+static void f3575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,16384);*r1p=r1;*r0p=r0;}
+static void f3577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,16383);*r1p=r1;*r0p=r0;}
+static void f3579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,32768);*r1p=r1;*r0p=r0;}
+static void f3581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,32767);*r1p=r1;*r0p=r0;}
+static void f3583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-8193,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-1);*r1p=r1;*r0p=r0;}
+static void f3586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,0);*r1p=r1;*r0p=r0;}
+static void f3587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,2);*r1p=r1;*r0p=r0;}
+static void f3589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-3);*r1p=r1;*r0p=r0;}
+static void f3592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,4);*r1p=r1;*r0p=r0;}
+static void f3593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-4);*r1p=r1;*r0p=r0;}
+static void f3594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,3);*r1p=r1;*r0p=r0;}
+static void f3595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-5);*r1p=r1;*r0p=r0;}
+static void f3596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,8);*r1p=r1;*r0p=r0;}
+static void f3597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-8);*r1p=r1;*r0p=r0;}
+static void f3598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,7);*r1p=r1;*r0p=r0;}
+static void f3599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-9);*r1p=r1;*r0p=r0;}
+static void f3600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,16);*r1p=r1;*r0p=r0;}
+static void f3601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-16);*r1p=r1;*r0p=r0;}
+static void f3602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,15);*r1p=r1;*r0p=r0;}
+static void f3603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-17);*r1p=r1;*r0p=r0;}
+static void f3604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,32);*r1p=r1;*r0p=r0;}
+static void f3605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-32);*r1p=r1;*r0p=r0;}
+static void f3606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,31);*r1p=r1;*r0p=r0;}
+static void f3607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-33);*r1p=r1;*r0p=r0;}
+static void f3608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,64);*r1p=r1;*r0p=r0;}
+static void f3609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-64);*r1p=r1;*r0p=r0;}
+static void f3610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,63);*r1p=r1;*r0p=r0;}
+static void f3611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-65);*r1p=r1;*r0p=r0;}
+static void f3612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,128);*r1p=r1;*r0p=r0;}
+static void f3613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-128);*r1p=r1;*r0p=r0;}
+static void f3614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,127);*r1p=r1;*r0p=r0;}
+static void f3615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-129);*r1p=r1;*r0p=r0;}
+static void f3616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,256);*r1p=r1;*r0p=r0;}
+static void f3617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-256);*r1p=r1;*r0p=r0;}
+static void f3618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,255);*r1p=r1;*r0p=r0;}
+static void f3619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-257);*r1p=r1;*r0p=r0;}
+static void f3620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,512);*r1p=r1;*r0p=r0;}
+static void f3621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-512);*r1p=r1;*r0p=r0;}
+static void f3622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,511);*r1p=r1;*r0p=r0;}
+static void f3623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-513);*r1p=r1;*r0p=r0;}
+static void f3624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,1024);*r1p=r1;*r0p=r0;}
+static void f3625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,1023);*r1p=r1;*r0p=r0;}
+static void f3627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,2048);*r1p=r1;*r0p=r0;}
+static void f3629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,2047);*r1p=r1;*r0p=r0;}
+static void f3631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,4096);*r1p=r1;*r0p=r0;}
+static void f3633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,4095);*r1p=r1;*r0p=r0;}
+static void f3635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,8192);*r1p=r1;*r0p=r0;}
+static void f3637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,8191);*r1p=r1;*r0p=r0;}
+static void f3639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,16384);*r1p=r1;*r0p=r0;}
+static void f3641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,16383);*r1p=r1;*r0p=r0;}
+static void f3643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,32768);*r1p=r1;*r0p=r0;}
+static void f3645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,32767);*r1p=r1;*r0p=r0;}
+static void f3647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16384,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-1);*r1p=r1;*r0p=r0;}
+static void f3650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,0);*r1p=r1;*r0p=r0;}
+static void f3651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,2);*r1p=r1;*r0p=r0;}
+static void f3653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-3);*r1p=r1;*r0p=r0;}
+static void f3656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,4);*r1p=r1;*r0p=r0;}
+static void f3657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-4);*r1p=r1;*r0p=r0;}
+static void f3658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,3);*r1p=r1;*r0p=r0;}
+static void f3659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-5);*r1p=r1;*r0p=r0;}
+static void f3660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,8);*r1p=r1;*r0p=r0;}
+static void f3661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-8);*r1p=r1;*r0p=r0;}
+static void f3662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,7);*r1p=r1;*r0p=r0;}
+static void f3663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-9);*r1p=r1;*r0p=r0;}
+static void f3664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,16);*r1p=r1;*r0p=r0;}
+static void f3665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-16);*r1p=r1;*r0p=r0;}
+static void f3666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,15);*r1p=r1;*r0p=r0;}
+static void f3667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-17);*r1p=r1;*r0p=r0;}
+static void f3668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,32);*r1p=r1;*r0p=r0;}
+static void f3669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-32);*r1p=r1;*r0p=r0;}
+static void f3670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,31);*r1p=r1;*r0p=r0;}
+static void f3671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-33);*r1p=r1;*r0p=r0;}
+static void f3672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,64);*r1p=r1;*r0p=r0;}
+static void f3673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-64);*r1p=r1;*r0p=r0;}
+static void f3674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,63);*r1p=r1;*r0p=r0;}
+static void f3675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-65);*r1p=r1;*r0p=r0;}
+static void f3676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,128);*r1p=r1;*r0p=r0;}
+static void f3677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-128);*r1p=r1;*r0p=r0;}
+static void f3678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,127);*r1p=r1;*r0p=r0;}
+static void f3679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-129);*r1p=r1;*r0p=r0;}
+static void f3680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,256);*r1p=r1;*r0p=r0;}
+static void f3681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-256);*r1p=r1;*r0p=r0;}
+static void f3682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,255);*r1p=r1;*r0p=r0;}
+static void f3683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-257);*r1p=r1;*r0p=r0;}
+static void f3684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,512);*r1p=r1;*r0p=r0;}
+static void f3685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-512);*r1p=r1;*r0p=r0;}
+static void f3686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,511);*r1p=r1;*r0p=r0;}
+static void f3687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-513);*r1p=r1;*r0p=r0;}
+static void f3688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,1024);*r1p=r1;*r0p=r0;}
+static void f3689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,1023);*r1p=r1;*r0p=r0;}
+static void f3691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,2048);*r1p=r1;*r0p=r0;}
+static void f3693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,2047);*r1p=r1;*r0p=r0;}
+static void f3695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,4096);*r1p=r1;*r0p=r0;}
+static void f3697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,4095);*r1p=r1;*r0p=r0;}
+static void f3699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,8192);*r1p=r1;*r0p=r0;}
+static void f3701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,8191);*r1p=r1;*r0p=r0;}
+static void f3703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,16384);*r1p=r1;*r0p=r0;}
+static void f3705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,16383);*r1p=r1;*r0p=r0;}
+static void f3707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,32768);*r1p=r1;*r0p=r0;}
+static void f3709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,32767);*r1p=r1;*r0p=r0;}
+static void f3711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16384,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,1);*r1p=r1;*r0p=r0;}
+static void f3713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-1);*r1p=r1;*r0p=r0;}
+static void f3714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,0);*r1p=r1;*r0p=r0;}
+static void f3715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-2);*r1p=r1;*r0p=r0;}
+static void f3716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,2);*r1p=r1;*r0p=r0;}
+static void f3717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-2);*r1p=r1;*r0p=r0;}
+static void f3718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,1);*r1p=r1;*r0p=r0;}
+static void f3719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-3);*r1p=r1;*r0p=r0;}
+static void f3720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,4);*r1p=r1;*r0p=r0;}
+static void f3721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-4);*r1p=r1;*r0p=r0;}
+static void f3722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,3);*r1p=r1;*r0p=r0;}
+static void f3723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-5);*r1p=r1;*r0p=r0;}
+static void f3724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,8);*r1p=r1;*r0p=r0;}
+static void f3725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-8);*r1p=r1;*r0p=r0;}
+static void f3726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,7);*r1p=r1;*r0p=r0;}
+static void f3727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-9);*r1p=r1;*r0p=r0;}
+static void f3728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,16);*r1p=r1;*r0p=r0;}
+static void f3729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-16);*r1p=r1;*r0p=r0;}
+static void f3730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,15);*r1p=r1;*r0p=r0;}
+static void f3731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-17);*r1p=r1;*r0p=r0;}
+static void f3732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,32);*r1p=r1;*r0p=r0;}
+static void f3733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-32);*r1p=r1;*r0p=r0;}
+static void f3734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,31);*r1p=r1;*r0p=r0;}
+static void f3735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-33);*r1p=r1;*r0p=r0;}
+static void f3736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,64);*r1p=r1;*r0p=r0;}
+static void f3737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-64);*r1p=r1;*r0p=r0;}
+static void f3738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,63);*r1p=r1;*r0p=r0;}
+static void f3739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-65);*r1p=r1;*r0p=r0;}
+static void f3740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,128);*r1p=r1;*r0p=r0;}
+static void f3741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-128);*r1p=r1;*r0p=r0;}
+static void f3742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,127);*r1p=r1;*r0p=r0;}
+static void f3743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-129);*r1p=r1;*r0p=r0;}
+static void f3744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,256);*r1p=r1;*r0p=r0;}
+static void f3745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-256);*r1p=r1;*r0p=r0;}
+static void f3746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,255);*r1p=r1;*r0p=r0;}
+static void f3747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-257);*r1p=r1;*r0p=r0;}
+static void f3748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,512);*r1p=r1;*r0p=r0;}
+static void f3749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-512);*r1p=r1;*r0p=r0;}
+static void f3750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,511);*r1p=r1;*r0p=r0;}
+static void f3751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-513);*r1p=r1;*r0p=r0;}
+static void f3752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,1024);*r1p=r1;*r0p=r0;}
+static void f3753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,1023);*r1p=r1;*r0p=r0;}
+static void f3755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,2048);*r1p=r1;*r0p=r0;}
+static void f3757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,2047);*r1p=r1;*r0p=r0;}
+static void f3759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,4096);*r1p=r1;*r0p=r0;}
+static void f3761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,4095);*r1p=r1;*r0p=r0;}
+static void f3763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,8192);*r1p=r1;*r0p=r0;}
+static void f3765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,8191);*r1p=r1;*r0p=r0;}
+static void f3767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,16384);*r1p=r1;*r0p=r0;}
+static void f3769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,16383);*r1p=r1;*r0p=r0;}
+static void f3771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,32768);*r1p=r1;*r0p=r0;}
+static void f3773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,32767);*r1p=r1;*r0p=r0;}
+static void f3775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,16383,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,1);*r1p=r1;*r0p=r0;}
+static void f3777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-1);*r1p=r1;*r0p=r0;}
+static void f3778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,0);*r1p=r1;*r0p=r0;}
+static void f3779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-2);*r1p=r1;*r0p=r0;}
+static void f3780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,2);*r1p=r1;*r0p=r0;}
+static void f3781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-2);*r1p=r1;*r0p=r0;}
+static void f3782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,1);*r1p=r1;*r0p=r0;}
+static void f3783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-3);*r1p=r1;*r0p=r0;}
+static void f3784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,4);*r1p=r1;*r0p=r0;}
+static void f3785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-4);*r1p=r1;*r0p=r0;}
+static void f3786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,3);*r1p=r1;*r0p=r0;}
+static void f3787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-5);*r1p=r1;*r0p=r0;}
+static void f3788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,8);*r1p=r1;*r0p=r0;}
+static void f3789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-8);*r1p=r1;*r0p=r0;}
+static void f3790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,7);*r1p=r1;*r0p=r0;}
+static void f3791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-9);*r1p=r1;*r0p=r0;}
+static void f3792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,16);*r1p=r1;*r0p=r0;}
+static void f3793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-16);*r1p=r1;*r0p=r0;}
+static void f3794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,15);*r1p=r1;*r0p=r0;}
+static void f3795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-17);*r1p=r1;*r0p=r0;}
+static void f3796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,32);*r1p=r1;*r0p=r0;}
+static void f3797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-32);*r1p=r1;*r0p=r0;}
+static void f3798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,31);*r1p=r1;*r0p=r0;}
+static void f3799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-33);*r1p=r1;*r0p=r0;}
+static void f3800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,64);*r1p=r1;*r0p=r0;}
+static void f3801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-64);*r1p=r1;*r0p=r0;}
+static void f3802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,63);*r1p=r1;*r0p=r0;}
+static void f3803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-65);*r1p=r1;*r0p=r0;}
+static void f3804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,128);*r1p=r1;*r0p=r0;}
+static void f3805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-128);*r1p=r1;*r0p=r0;}
+static void f3806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,127);*r1p=r1;*r0p=r0;}
+static void f3807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-129);*r1p=r1;*r0p=r0;}
+static void f3808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,256);*r1p=r1;*r0p=r0;}
+static void f3809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-256);*r1p=r1;*r0p=r0;}
+static void f3810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,255);*r1p=r1;*r0p=r0;}
+static void f3811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-257);*r1p=r1;*r0p=r0;}
+static void f3812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,512);*r1p=r1;*r0p=r0;}
+static void f3813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-512);*r1p=r1;*r0p=r0;}
+static void f3814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,511);*r1p=r1;*r0p=r0;}
+static void f3815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-513);*r1p=r1;*r0p=r0;}
+static void f3816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,1024);*r1p=r1;*r0p=r0;}
+static void f3817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,1023);*r1p=r1;*r0p=r0;}
+static void f3819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,2048);*r1p=r1;*r0p=r0;}
+static void f3821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,2047);*r1p=r1;*r0p=r0;}
+static void f3823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,4096);*r1p=r1;*r0p=r0;}
+static void f3825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,4095);*r1p=r1;*r0p=r0;}
+static void f3827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,8192);*r1p=r1;*r0p=r0;}
+static void f3829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,8191);*r1p=r1;*r0p=r0;}
+static void f3831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,16384);*r1p=r1;*r0p=r0;}
+static void f3833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,16383);*r1p=r1;*r0p=r0;}
+static void f3835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,32768);*r1p=r1;*r0p=r0;}
+static void f3837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,32767);*r1p=r1;*r0p=r0;}
+static void f3839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-16385,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-1);*r1p=r1;*r0p=r0;}
+static void f3842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,0);*r1p=r1;*r0p=r0;}
+static void f3843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,2);*r1p=r1;*r0p=r0;}
+static void f3845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-3);*r1p=r1;*r0p=r0;}
+static void f3848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,4);*r1p=r1;*r0p=r0;}
+static void f3849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-4);*r1p=r1;*r0p=r0;}
+static void f3850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,3);*r1p=r1;*r0p=r0;}
+static void f3851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-5);*r1p=r1;*r0p=r0;}
+static void f3852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,8);*r1p=r1;*r0p=r0;}
+static void f3853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-8);*r1p=r1;*r0p=r0;}
+static void f3854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,7);*r1p=r1;*r0p=r0;}
+static void f3855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-9);*r1p=r1;*r0p=r0;}
+static void f3856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,16);*r1p=r1;*r0p=r0;}
+static void f3857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-16);*r1p=r1;*r0p=r0;}
+static void f3858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,15);*r1p=r1;*r0p=r0;}
+static void f3859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-17);*r1p=r1;*r0p=r0;}
+static void f3860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,32);*r1p=r1;*r0p=r0;}
+static void f3861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-32);*r1p=r1;*r0p=r0;}
+static void f3862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,31);*r1p=r1;*r0p=r0;}
+static void f3863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-33);*r1p=r1;*r0p=r0;}
+static void f3864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,64);*r1p=r1;*r0p=r0;}
+static void f3865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-64);*r1p=r1;*r0p=r0;}
+static void f3866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,63);*r1p=r1;*r0p=r0;}
+static void f3867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-65);*r1p=r1;*r0p=r0;}
+static void f3868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,128);*r1p=r1;*r0p=r0;}
+static void f3869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-128);*r1p=r1;*r0p=r0;}
+static void f3870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,127);*r1p=r1;*r0p=r0;}
+static void f3871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-129);*r1p=r1;*r0p=r0;}
+static void f3872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,256);*r1p=r1;*r0p=r0;}
+static void f3873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-256);*r1p=r1;*r0p=r0;}
+static void f3874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,255);*r1p=r1;*r0p=r0;}
+static void f3875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-257);*r1p=r1;*r0p=r0;}
+static void f3876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,512);*r1p=r1;*r0p=r0;}
+static void f3877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-512);*r1p=r1;*r0p=r0;}
+static void f3878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,511);*r1p=r1;*r0p=r0;}
+static void f3879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-513);*r1p=r1;*r0p=r0;}
+static void f3880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,1024);*r1p=r1;*r0p=r0;}
+static void f3881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,1023);*r1p=r1;*r0p=r0;}
+static void f3883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,2048);*r1p=r1;*r0p=r0;}
+static void f3885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,2047);*r1p=r1;*r0p=r0;}
+static void f3887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,4096);*r1p=r1;*r0p=r0;}
+static void f3889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,4095);*r1p=r1;*r0p=r0;}
+static void f3891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,8192);*r1p=r1;*r0p=r0;}
+static void f3893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,8191);*r1p=r1;*r0p=r0;}
+static void f3895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,16384);*r1p=r1;*r0p=r0;}
+static void f3897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,16383);*r1p=r1;*r0p=r0;}
+static void f3899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,32768);*r1p=r1;*r0p=r0;}
+static void f3901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,32767);*r1p=r1;*r0p=r0;}
+static void f3903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32768,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-1);*r1p=r1;*r0p=r0;}
+static void f3906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,0);*r1p=r1;*r0p=r0;}
+static void f3907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,2);*r1p=r1;*r0p=r0;}
+static void f3909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-3);*r1p=r1;*r0p=r0;}
+static void f3912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,4);*r1p=r1;*r0p=r0;}
+static void f3913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-4);*r1p=r1;*r0p=r0;}
+static void f3914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,3);*r1p=r1;*r0p=r0;}
+static void f3915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-5);*r1p=r1;*r0p=r0;}
+static void f3916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,8);*r1p=r1;*r0p=r0;}
+static void f3917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-8);*r1p=r1;*r0p=r0;}
+static void f3918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,7);*r1p=r1;*r0p=r0;}
+static void f3919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-9);*r1p=r1;*r0p=r0;}
+static void f3920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,16);*r1p=r1;*r0p=r0;}
+static void f3921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-16);*r1p=r1;*r0p=r0;}
+static void f3922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,15);*r1p=r1;*r0p=r0;}
+static void f3923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-17);*r1p=r1;*r0p=r0;}
+static void f3924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,32);*r1p=r1;*r0p=r0;}
+static void f3925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-32);*r1p=r1;*r0p=r0;}
+static void f3926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,31);*r1p=r1;*r0p=r0;}
+static void f3927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-33);*r1p=r1;*r0p=r0;}
+static void f3928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,64);*r1p=r1;*r0p=r0;}
+static void f3929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-64);*r1p=r1;*r0p=r0;}
+static void f3930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,63);*r1p=r1;*r0p=r0;}
+static void f3931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-65);*r1p=r1;*r0p=r0;}
+static void f3932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,128);*r1p=r1;*r0p=r0;}
+static void f3933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-128);*r1p=r1;*r0p=r0;}
+static void f3934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,127);*r1p=r1;*r0p=r0;}
+static void f3935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-129);*r1p=r1;*r0p=r0;}
+static void f3936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,256);*r1p=r1;*r0p=r0;}
+static void f3937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-256);*r1p=r1;*r0p=r0;}
+static void f3938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,255);*r1p=r1;*r0p=r0;}
+static void f3939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-257);*r1p=r1;*r0p=r0;}
+static void f3940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,512);*r1p=r1;*r0p=r0;}
+static void f3941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-512);*r1p=r1;*r0p=r0;}
+static void f3942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,511);*r1p=r1;*r0p=r0;}
+static void f3943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-513);*r1p=r1;*r0p=r0;}
+static void f3944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,1024);*r1p=r1;*r0p=r0;}
+static void f3945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,1023);*r1p=r1;*r0p=r0;}
+static void f3947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,2048);*r1p=r1;*r0p=r0;}
+static void f3949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,2047);*r1p=r1;*r0p=r0;}
+static void f3951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,4096);*r1p=r1;*r0p=r0;}
+static void f3953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,4095);*r1p=r1;*r0p=r0;}
+static void f3955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,8192);*r1p=r1;*r0p=r0;}
+static void f3957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,8191);*r1p=r1;*r0p=r0;}
+static void f3959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,16384);*r1p=r1;*r0p=r0;}
+static void f3961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,16383);*r1p=r1;*r0p=r0;}
+static void f3963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,32768);*r1p=r1;*r0p=r0;}
+static void f3965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,32767);*r1p=r1;*r0p=r0;}
+static void f3967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32768,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,1);*r1p=r1;*r0p=r0;}
+static void f3969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-1);*r1p=r1;*r0p=r0;}
+static void f3970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,0);*r1p=r1;*r0p=r0;}
+static void f3971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-2);*r1p=r1;*r0p=r0;}
+static void f3972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,2);*r1p=r1;*r0p=r0;}
+static void f3973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-2);*r1p=r1;*r0p=r0;}
+static void f3974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,1);*r1p=r1;*r0p=r0;}
+static void f3975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-3);*r1p=r1;*r0p=r0;}
+static void f3976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,4);*r1p=r1;*r0p=r0;}
+static void f3977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-4);*r1p=r1;*r0p=r0;}
+static void f3978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,3);*r1p=r1;*r0p=r0;}
+static void f3979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-5);*r1p=r1;*r0p=r0;}
+static void f3980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,8);*r1p=r1;*r0p=r0;}
+static void f3981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-8);*r1p=r1;*r0p=r0;}
+static void f3982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,7);*r1p=r1;*r0p=r0;}
+static void f3983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-9);*r1p=r1;*r0p=r0;}
+static void f3984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,16);*r1p=r1;*r0p=r0;}
+static void f3985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-16);*r1p=r1;*r0p=r0;}
+static void f3986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,15);*r1p=r1;*r0p=r0;}
+static void f3987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-17);*r1p=r1;*r0p=r0;}
+static void f3988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,32);*r1p=r1;*r0p=r0;}
+static void f3989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-32);*r1p=r1;*r0p=r0;}
+static void f3990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,31);*r1p=r1;*r0p=r0;}
+static void f3991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-33);*r1p=r1;*r0p=r0;}
+static void f3992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,64);*r1p=r1;*r0p=r0;}
+static void f3993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-64);*r1p=r1;*r0p=r0;}
+static void f3994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,63);*r1p=r1;*r0p=r0;}
+static void f3995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-65);*r1p=r1;*r0p=r0;}
+static void f3996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,128);*r1p=r1;*r0p=r0;}
+static void f3997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-128);*r1p=r1;*r0p=r0;}
+static void f3998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,127);*r1p=r1;*r0p=r0;}
+static void f3999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-129);*r1p=r1;*r0p=r0;}
+static void f4000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,256);*r1p=r1;*r0p=r0;}
+static void f4001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-256);*r1p=r1;*r0p=r0;}
+static void f4002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,255);*r1p=r1;*r0p=r0;}
+static void f4003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-257);*r1p=r1;*r0p=r0;}
+static void f4004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,512);*r1p=r1;*r0p=r0;}
+static void f4005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-512);*r1p=r1;*r0p=r0;}
+static void f4006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,511);*r1p=r1;*r0p=r0;}
+static void f4007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-513);*r1p=r1;*r0p=r0;}
+static void f4008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,1024);*r1p=r1;*r0p=r0;}
+static void f4009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-1024);*r1p=r1;*r0p=r0;}
+static void f4010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,1023);*r1p=r1;*r0p=r0;}
+static void f4011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-1025);*r1p=r1;*r0p=r0;}
+static void f4012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,2048);*r1p=r1;*r0p=r0;}
+static void f4013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-2048);*r1p=r1;*r0p=r0;}
+static void f4014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,2047);*r1p=r1;*r0p=r0;}
+static void f4015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-2049);*r1p=r1;*r0p=r0;}
+static void f4016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,4096);*r1p=r1;*r0p=r0;}
+static void f4017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-4096);*r1p=r1;*r0p=r0;}
+static void f4018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,4095);*r1p=r1;*r0p=r0;}
+static void f4019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-4097);*r1p=r1;*r0p=r0;}
+static void f4020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,8192);*r1p=r1;*r0p=r0;}
+static void f4021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-8192);*r1p=r1;*r0p=r0;}
+static void f4022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,8191);*r1p=r1;*r0p=r0;}
+static void f4023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-8193);*r1p=r1;*r0p=r0;}
+static void f4024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,16384);*r1p=r1;*r0p=r0;}
+static void f4025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-16384);*r1p=r1;*r0p=r0;}
+static void f4026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,16383);*r1p=r1;*r0p=r0;}
+static void f4027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-16385);*r1p=r1;*r0p=r0;}
+static void f4028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,32768);*r1p=r1;*r0p=r0;}
+static void f4029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-32768);*r1p=r1;*r0p=r0;}
+static void f4030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,32767);*r1p=r1;*r0p=r0;}
+static void f4031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,32767,0,-32769);*r1p=r1;*r0p=r0;}
+static void f4032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,1);*r1p=r1;*r0p=r0;}
+static void f4033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-1);*r1p=r1;*r0p=r0;}
+static void f4034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,0);*r1p=r1;*r0p=r0;}
+static void f4035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-2);*r1p=r1;*r0p=r0;}
+static void f4036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,2);*r1p=r1;*r0p=r0;}
+static void f4037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-2);*r1p=r1;*r0p=r0;}
+static void f4038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,1);*r1p=r1;*r0p=r0;}
+static void f4039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-3);*r1p=r1;*r0p=r0;}
+static void f4040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,4);*r1p=r1;*r0p=r0;}
+static void f4041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-4);*r1p=r1;*r0p=r0;}
+static void f4042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,3);*r1p=r1;*r0p=r0;}
+static void f4043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-5);*r1p=r1;*r0p=r0;}
+static void f4044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,8);*r1p=r1;*r0p=r0;}
+static void f4045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-8);*r1p=r1;*r0p=r0;}
+static void f4046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,7);*r1p=r1;*r0p=r0;}
+static void f4047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-9);*r1p=r1;*r0p=r0;}
+static void f4048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,16);*r1p=r1;*r0p=r0;}
+static void f4049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-16);*r1p=r1;*r0p=r0;}
+static void f4050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,15);*r1p=r1;*r0p=r0;}
+static void f4051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-17);*r1p=r1;*r0p=r0;}
+static void f4052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,32);*r1p=r1;*r0p=r0;}
+static void f4053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-32);*r1p=r1;*r0p=r0;}
+static void f4054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,31);*r1p=r1;*r0p=r0;}
+static void f4055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-33);*r1p=r1;*r0p=r0;}
+static void f4056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,64);*r1p=r1;*r0p=r0;}
+static void f4057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-64);*r1p=r1;*r0p=r0;}
+static void f4058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,63);*r1p=r1;*r0p=r0;}
+static void f4059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-65);*r1p=r1;*r0p=r0;}
+static void f4060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,128);*r1p=r1;*r0p=r0;}
+static void f4061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-128);*r1p=r1;*r0p=r0;}
+static void f4062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,127);*r1p=r1;*r0p=r0;}
+static void f4063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-129);*r1p=r1;*r0p=r0;}
+static void f4064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,256);*r1p=r1;*r0p=r0;}
+static void f4065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-256);*r1p=r1;*r0p=r0;}
+static void f4066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,255);*r1p=r1;*r0p=r0;}
+static void f4067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-257);*r1p=r1;*r0p=r0;}
+static void f4068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,512);*r1p=r1;*r0p=r0;}
+static void f4069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-512);*r1p=r1;*r0p=r0;}
+static void f4070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,511);*r1p=r1;*r0p=r0;}
+static void f4071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-513);*r1p=r1;*r0p=r0;}
+static void f4072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,1024);*r1p=r1;*r0p=r0;}
+static void f4073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-1024);*r1p=r1;*r0p=r0;}
+static void f4074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,1023);*r1p=r1;*r0p=r0;}
+static void f4075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-1025);*r1p=r1;*r0p=r0;}
+static void f4076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,2048);*r1p=r1;*r0p=r0;}
+static void f4077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-2048);*r1p=r1;*r0p=r0;}
+static void f4078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,2047);*r1p=r1;*r0p=r0;}
+static void f4079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-2049);*r1p=r1;*r0p=r0;}
+static void f4080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,4096);*r1p=r1;*r0p=r0;}
+static void f4081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-4096);*r1p=r1;*r0p=r0;}
+static void f4082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,4095);*r1p=r1;*r0p=r0;}
+static void f4083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-4097);*r1p=r1;*r0p=r0;}
+static void f4084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,8192);*r1p=r1;*r0p=r0;}
+static void f4085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-8192);*r1p=r1;*r0p=r0;}
+static void f4086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,8191);*r1p=r1;*r0p=r0;}
+static void f4087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-8193);*r1p=r1;*r0p=r0;}
+static void f4088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,16384);*r1p=r1;*r0p=r0;}
+static void f4089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-16384);*r1p=r1;*r0p=r0;}
+static void f4090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,16383);*r1p=r1;*r0p=r0;}
+static void f4091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-16385);*r1p=r1;*r0p=r0;}
+static void f4092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,32768);*r1p=r1;*r0p=r0;}
+static void f4093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-32768);*r1p=r1;*r0p=r0;}
+static void f4094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,32767);*r1p=r1;*r0p=r0;}
+static void f4095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;add_ssaaaa(r1,r0,0,-32769,0,-32769);*r1p=r1;*r0p=r0;}
+typedef void (*func_t) (mp_limb_t*, mp_limb_t*);
+static const func_t funcs[4096] = {
+f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,
+f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,
+f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,
+f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,
+f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,
+f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,
+f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108,f109,f110,f111,
+f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,
+f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,
+f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,
+f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,
+f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,
+f192,f193,f194,f195,f196,f197,f198,f199,f200,f201,f202,f203,f204,f205,f206,f207,
+f208,f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,
+f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,
+f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,
+f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,
+f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,
+f288,f289,f290,f291,f292,f293,f294,f295,f296,f297,f298,f299,f300,f301,f302,f303,
+f304,f305,f306,f307,f308,f309,f310,f311,f312,f313,f314,f315,f316,f317,f318,f319,
+f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,
+f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,
+f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,
+f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,
+f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,
+f400,f401,f402,f403,f404,f405,f406,f407,f408,f409,f410,f411,f412,f413,f414,f415,
+f416,f417,f418,f419,f420,f421,f422,f423,f424,f425,f426,f427,f428,f429,f430,f431,
+f432,f433,f434,f435,f436,f437,f438,f439,f440,f441,f442,f443,f444,f445,f446,f447,
+f448,f449,f450,f451,f452,f453,f454,f455,f456,f457,f458,f459,f460,f461,f462,f463,
+f464,f465,f466,f467,f468,f469,f470,f471,f472,f473,f474,f475,f476,f477,f478,f479,
+f480,f481,f482,f483,f484,f485,f486,f487,f488,f489,f490,f491,f492,f493,f494,f495,
+f496,f497,f498,f499,f500,f501,f502,f503,f504,f505,f506,f507,f508,f509,f510,f511,
+f512,f513,f514,f515,f516,f517,f518,f519,f520,f521,f522,f523,f524,f525,f526,f527,
+f528,f529,f530,f531,f532,f533,f534,f535,f536,f537,f538,f539,f540,f541,f542,f543,
+f544,f545,f546,f547,f548,f549,f550,f551,f552,f553,f554,f555,f556,f557,f558,f559,
+f560,f561,f562,f563,f564,f565,f566,f567,f568,f569,f570,f571,f572,f573,f574,f575,
+f576,f577,f578,f579,f580,f581,f582,f583,f584,f585,f586,f587,f588,f589,f590,f591,
+f592,f593,f594,f595,f596,f597,f598,f599,f600,f601,f602,f603,f604,f605,f606,f607,
+f608,f609,f610,f611,f612,f613,f614,f615,f616,f617,f618,f619,f620,f621,f622,f623,
+f624,f625,f626,f627,f628,f629,f630,f631,f632,f633,f634,f635,f636,f637,f638,f639,
+f640,f641,f642,f643,f644,f645,f646,f647,f648,f649,f650,f651,f652,f653,f654,f655,
+f656,f657,f658,f659,f660,f661,f662,f663,f664,f665,f666,f667,f668,f669,f670,f671,
+f672,f673,f674,f675,f676,f677,f678,f679,f680,f681,f682,f683,f684,f685,f686,f687,
+f688,f689,f690,f691,f692,f693,f694,f695,f696,f697,f698,f699,f700,f701,f702,f703,
+f704,f705,f706,f707,f708,f709,f710,f711,f712,f713,f714,f715,f716,f717,f718,f719,
+f720,f721,f722,f723,f724,f725,f726,f727,f728,f729,f730,f731,f732,f733,f734,f735,
+f736,f737,f738,f739,f740,f741,f742,f743,f744,f745,f746,f747,f748,f749,f750,f751,
+f752,f753,f754,f755,f756,f757,f758,f759,f760,f761,f762,f763,f764,f765,f766,f767,
+f768,f769,f770,f771,f772,f773,f774,f775,f776,f777,f778,f779,f780,f781,f782,f783,
+f784,f785,f786,f787,f788,f789,f790,f791,f792,f793,f794,f795,f796,f797,f798,f799,
+f800,f801,f802,f803,f804,f805,f806,f807,f808,f809,f810,f811,f812,f813,f814,f815,
+f816,f817,f818,f819,f820,f821,f822,f823,f824,f825,f826,f827,f828,f829,f830,f831,
+f832,f833,f834,f835,f836,f837,f838,f839,f840,f841,f842,f843,f844,f845,f846,f847,
+f848,f849,f850,f851,f852,f853,f854,f855,f856,f857,f858,f859,f860,f861,f862,f863,
+f864,f865,f866,f867,f868,f869,f870,f871,f872,f873,f874,f875,f876,f877,f878,f879,
+f880,f881,f882,f883,f884,f885,f886,f887,f888,f889,f890,f891,f892,f893,f894,f895,
+f896,f897,f898,f899,f900,f901,f902,f903,f904,f905,f906,f907,f908,f909,f910,f911,
+f912,f913,f914,f915,f916,f917,f918,f919,f920,f921,f922,f923,f924,f925,f926,f927,
+f928,f929,f930,f931,f932,f933,f934,f935,f936,f937,f938,f939,f940,f941,f942,f943,
+f944,f945,f946,f947,f948,f949,f950,f951,f952,f953,f954,f955,f956,f957,f958,f959,
+f960,f961,f962,f963,f964,f965,f966,f967,f968,f969,f970,f971,f972,f973,f974,f975,
+f976,f977,f978,f979,f980,f981,f982,f983,f984,f985,f986,f987,f988,f989,f990,f991,
+f992,f993,f994,f995,f996,f997,f998,f999,f1000,f1001,f1002,f1003,f1004,f1005,f1006,f1007,
+f1008,f1009,f1010,f1011,f1012,f1013,f1014,f1015,f1016,f1017,f1018,f1019,f1020,f1021,f1022,f1023,
+f1024,f1025,f1026,f1027,f1028,f1029,f1030,f1031,f1032,f1033,f1034,f1035,f1036,f1037,f1038,f1039,
+f1040,f1041,f1042,f1043,f1044,f1045,f1046,f1047,f1048,f1049,f1050,f1051,f1052,f1053,f1054,f1055,
+f1056,f1057,f1058,f1059,f1060,f1061,f1062,f1063,f1064,f1065,f1066,f1067,f1068,f1069,f1070,f1071,
+f1072,f1073,f1074,f1075,f1076,f1077,f1078,f1079,f1080,f1081,f1082,f1083,f1084,f1085,f1086,f1087,
+f1088,f1089,f1090,f1091,f1092,f1093,f1094,f1095,f1096,f1097,f1098,f1099,f1100,f1101,f1102,f1103,
+f1104,f1105,f1106,f1107,f1108,f1109,f1110,f1111,f1112,f1113,f1114,f1115,f1116,f1117,f1118,f1119,
+f1120,f1121,f1122,f1123,f1124,f1125,f1126,f1127,f1128,f1129,f1130,f1131,f1132,f1133,f1134,f1135,
+f1136,f1137,f1138,f1139,f1140,f1141,f1142,f1143,f1144,f1145,f1146,f1147,f1148,f1149,f1150,f1151,
+f1152,f1153,f1154,f1155,f1156,f1157,f1158,f1159,f1160,f1161,f1162,f1163,f1164,f1165,f1166,f1167,
+f1168,f1169,f1170,f1171,f1172,f1173,f1174,f1175,f1176,f1177,f1178,f1179,f1180,f1181,f1182,f1183,
+f1184,f1185,f1186,f1187,f1188,f1189,f1190,f1191,f1192,f1193,f1194,f1195,f1196,f1197,f1198,f1199,
+f1200,f1201,f1202,f1203,f1204,f1205,f1206,f1207,f1208,f1209,f1210,f1211,f1212,f1213,f1214,f1215,
+f1216,f1217,f1218,f1219,f1220,f1221,f1222,f1223,f1224,f1225,f1226,f1227,f1228,f1229,f1230,f1231,
+f1232,f1233,f1234,f1235,f1236,f1237,f1238,f1239,f1240,f1241,f1242,f1243,f1244,f1245,f1246,f1247,
+f1248,f1249,f1250,f1251,f1252,f1253,f1254,f1255,f1256,f1257,f1258,f1259,f1260,f1261,f1262,f1263,
+f1264,f1265,f1266,f1267,f1268,f1269,f1270,f1271,f1272,f1273,f1274,f1275,f1276,f1277,f1278,f1279,
+f1280,f1281,f1282,f1283,f1284,f1285,f1286,f1287,f1288,f1289,f1290,f1291,f1292,f1293,f1294,f1295,
+f1296,f1297,f1298,f1299,f1300,f1301,f1302,f1303,f1304,f1305,f1306,f1307,f1308,f1309,f1310,f1311,
+f1312,f1313,f1314,f1315,f1316,f1317,f1318,f1319,f1320,f1321,f1322,f1323,f1324,f1325,f1326,f1327,
+f1328,f1329,f1330,f1331,f1332,f1333,f1334,f1335,f1336,f1337,f1338,f1339,f1340,f1341,f1342,f1343,
+f1344,f1345,f1346,f1347,f1348,f1349,f1350,f1351,f1352,f1353,f1354,f1355,f1356,f1357,f1358,f1359,
+f1360,f1361,f1362,f1363,f1364,f1365,f1366,f1367,f1368,f1369,f1370,f1371,f1372,f1373,f1374,f1375,
+f1376,f1377,f1378,f1379,f1380,f1381,f1382,f1383,f1384,f1385,f1386,f1387,f1388,f1389,f1390,f1391,
+f1392,f1393,f1394,f1395,f1396,f1397,f1398,f1399,f1400,f1401,f1402,f1403,f1404,f1405,f1406,f1407,
+f1408,f1409,f1410,f1411,f1412,f1413,f1414,f1415,f1416,f1417,f1418,f1419,f1420,f1421,f1422,f1423,
+f1424,f1425,f1426,f1427,f1428,f1429,f1430,f1431,f1432,f1433,f1434,f1435,f1436,f1437,f1438,f1439,
+f1440,f1441,f1442,f1443,f1444,f1445,f1446,f1447,f1448,f1449,f1450,f1451,f1452,f1453,f1454,f1455,
+f1456,f1457,f1458,f1459,f1460,f1461,f1462,f1463,f1464,f1465,f1466,f1467,f1468,f1469,f1470,f1471,
+f1472,f1473,f1474,f1475,f1476,f1477,f1478,f1479,f1480,f1481,f1482,f1483,f1484,f1485,f1486,f1487,
+f1488,f1489,f1490,f1491,f1492,f1493,f1494,f1495,f1496,f1497,f1498,f1499,f1500,f1501,f1502,f1503,
+f1504,f1505,f1506,f1507,f1508,f1509,f1510,f1511,f1512,f1513,f1514,f1515,f1516,f1517,f1518,f1519,
+f1520,f1521,f1522,f1523,f1524,f1525,f1526,f1527,f1528,f1529,f1530,f1531,f1532,f1533,f1534,f1535,
+f1536,f1537,f1538,f1539,f1540,f1541,f1542,f1543,f1544,f1545,f1546,f1547,f1548,f1549,f1550,f1551,
+f1552,f1553,f1554,f1555,f1556,f1557,f1558,f1559,f1560,f1561,f1562,f1563,f1564,f1565,f1566,f1567,
+f1568,f1569,f1570,f1571,f1572,f1573,f1574,f1575,f1576,f1577,f1578,f1579,f1580,f1581,f1582,f1583,
+f1584,f1585,f1586,f1587,f1588,f1589,f1590,f1591,f1592,f1593,f1594,f1595,f1596,f1597,f1598,f1599,
+f1600,f1601,f1602,f1603,f1604,f1605,f1606,f1607,f1608,f1609,f1610,f1611,f1612,f1613,f1614,f1615,
+f1616,f1617,f1618,f1619,f1620,f1621,f1622,f1623,f1624,f1625,f1626,f1627,f1628,f1629,f1630,f1631,
+f1632,f1633,f1634,f1635,f1636,f1637,f1638,f1639,f1640,f1641,f1642,f1643,f1644,f1645,f1646,f1647,
+f1648,f1649,f1650,f1651,f1652,f1653,f1654,f1655,f1656,f1657,f1658,f1659,f1660,f1661,f1662,f1663,
+f1664,f1665,f1666,f1667,f1668,f1669,f1670,f1671,f1672,f1673,f1674,f1675,f1676,f1677,f1678,f1679,
+f1680,f1681,f1682,f1683,f1684,f1685,f1686,f1687,f1688,f1689,f1690,f1691,f1692,f1693,f1694,f1695,
+f1696,f1697,f1698,f1699,f1700,f1701,f1702,f1703,f1704,f1705,f1706,f1707,f1708,f1709,f1710,f1711,
+f1712,f1713,f1714,f1715,f1716,f1717,f1718,f1719,f1720,f1721,f1722,f1723,f1724,f1725,f1726,f1727,
+f1728,f1729,f1730,f1731,f1732,f1733,f1734,f1735,f1736,f1737,f1738,f1739,f1740,f1741,f1742,f1743,
+f1744,f1745,f1746,f1747,f1748,f1749,f1750,f1751,f1752,f1753,f1754,f1755,f1756,f1757,f1758,f1759,
+f1760,f1761,f1762,f1763,f1764,f1765,f1766,f1767,f1768,f1769,f1770,f1771,f1772,f1773,f1774,f1775,
+f1776,f1777,f1778,f1779,f1780,f1781,f1782,f1783,f1784,f1785,f1786,f1787,f1788,f1789,f1790,f1791,
+f1792,f1793,f1794,f1795,f1796,f1797,f1798,f1799,f1800,f1801,f1802,f1803,f1804,f1805,f1806,f1807,
+f1808,f1809,f1810,f1811,f1812,f1813,f1814,f1815,f1816,f1817,f1818,f1819,f1820,f1821,f1822,f1823,
+f1824,f1825,f1826,f1827,f1828,f1829,f1830,f1831,f1832,f1833,f1834,f1835,f1836,f1837,f1838,f1839,
+f1840,f1841,f1842,f1843,f1844,f1845,f1846,f1847,f1848,f1849,f1850,f1851,f1852,f1853,f1854,f1855,
+f1856,f1857,f1858,f1859,f1860,f1861,f1862,f1863,f1864,f1865,f1866,f1867,f1868,f1869,f1870,f1871,
+f1872,f1873,f1874,f1875,f1876,f1877,f1878,f1879,f1880,f1881,f1882,f1883,f1884,f1885,f1886,f1887,
+f1888,f1889,f1890,f1891,f1892,f1893,f1894,f1895,f1896,f1897,f1898,f1899,f1900,f1901,f1902,f1903,
+f1904,f1905,f1906,f1907,f1908,f1909,f1910,f1911,f1912,f1913,f1914,f1915,f1916,f1917,f1918,f1919,
+f1920,f1921,f1922,f1923,f1924,f1925,f1926,f1927,f1928,f1929,f1930,f1931,f1932,f1933,f1934,f1935,
+f1936,f1937,f1938,f1939,f1940,f1941,f1942,f1943,f1944,f1945,f1946,f1947,f1948,f1949,f1950,f1951,
+f1952,f1953,f1954,f1955,f1956,f1957,f1958,f1959,f1960,f1961,f1962,f1963,f1964,f1965,f1966,f1967,
+f1968,f1969,f1970,f1971,f1972,f1973,f1974,f1975,f1976,f1977,f1978,f1979,f1980,f1981,f1982,f1983,
+f1984,f1985,f1986,f1987,f1988,f1989,f1990,f1991,f1992,f1993,f1994,f1995,f1996,f1997,f1998,f1999,
+f2000,f2001,f2002,f2003,f2004,f2005,f2006,f2007,f2008,f2009,f2010,f2011,f2012,f2013,f2014,f2015,
+f2016,f2017,f2018,f2019,f2020,f2021,f2022,f2023,f2024,f2025,f2026,f2027,f2028,f2029,f2030,f2031,
+f2032,f2033,f2034,f2035,f2036,f2037,f2038,f2039,f2040,f2041,f2042,f2043,f2044,f2045,f2046,f2047,
+f2048,f2049,f2050,f2051,f2052,f2053,f2054,f2055,f2056,f2057,f2058,f2059,f2060,f2061,f2062,f2063,
+f2064,f2065,f2066,f2067,f2068,f2069,f2070,f2071,f2072,f2073,f2074,f2075,f2076,f2077,f2078,f2079,
+f2080,f2081,f2082,f2083,f2084,f2085,f2086,f2087,f2088,f2089,f2090,f2091,f2092,f2093,f2094,f2095,
+f2096,f2097,f2098,f2099,f2100,f2101,f2102,f2103,f2104,f2105,f2106,f2107,f2108,f2109,f2110,f2111,
+f2112,f2113,f2114,f2115,f2116,f2117,f2118,f2119,f2120,f2121,f2122,f2123,f2124,f2125,f2126,f2127,
+f2128,f2129,f2130,f2131,f2132,f2133,f2134,f2135,f2136,f2137,f2138,f2139,f2140,f2141,f2142,f2143,
+f2144,f2145,f2146,f2147,f2148,f2149,f2150,f2151,f2152,f2153,f2154,f2155,f2156,f2157,f2158,f2159,
+f2160,f2161,f2162,f2163,f2164,f2165,f2166,f2167,f2168,f2169,f2170,f2171,f2172,f2173,f2174,f2175,
+f2176,f2177,f2178,f2179,f2180,f2181,f2182,f2183,f2184,f2185,f2186,f2187,f2188,f2189,f2190,f2191,
+f2192,f2193,f2194,f2195,f2196,f2197,f2198,f2199,f2200,f2201,f2202,f2203,f2204,f2205,f2206,f2207,
+f2208,f2209,f2210,f2211,f2212,f2213,f2214,f2215,f2216,f2217,f2218,f2219,f2220,f2221,f2222,f2223,
+f2224,f2225,f2226,f2227,f2228,f2229,f2230,f2231,f2232,f2233,f2234,f2235,f2236,f2237,f2238,f2239,
+f2240,f2241,f2242,f2243,f2244,f2245,f2246,f2247,f2248,f2249,f2250,f2251,f2252,f2253,f2254,f2255,
+f2256,f2257,f2258,f2259,f2260,f2261,f2262,f2263,f2264,f2265,f2266,f2267,f2268,f2269,f2270,f2271,
+f2272,f2273,f2274,f2275,f2276,f2277,f2278,f2279,f2280,f2281,f2282,f2283,f2284,f2285,f2286,f2287,
+f2288,f2289,f2290,f2291,f2292,f2293,f2294,f2295,f2296,f2297,f2298,f2299,f2300,f2301,f2302,f2303,
+f2304,f2305,f2306,f2307,f2308,f2309,f2310,f2311,f2312,f2313,f2314,f2315,f2316,f2317,f2318,f2319,
+f2320,f2321,f2322,f2323,f2324,f2325,f2326,f2327,f2328,f2329,f2330,f2331,f2332,f2333,f2334,f2335,
+f2336,f2337,f2338,f2339,f2340,f2341,f2342,f2343,f2344,f2345,f2346,f2347,f2348,f2349,f2350,f2351,
+f2352,f2353,f2354,f2355,f2356,f2357,f2358,f2359,f2360,f2361,f2362,f2363,f2364,f2365,f2366,f2367,
+f2368,f2369,f2370,f2371,f2372,f2373,f2374,f2375,f2376,f2377,f2378,f2379,f2380,f2381,f2382,f2383,
+f2384,f2385,f2386,f2387,f2388,f2389,f2390,f2391,f2392,f2393,f2394,f2395,f2396,f2397,f2398,f2399,
+f2400,f2401,f2402,f2403,f2404,f2405,f2406,f2407,f2408,f2409,f2410,f2411,f2412,f2413,f2414,f2415,
+f2416,f2417,f2418,f2419,f2420,f2421,f2422,f2423,f2424,f2425,f2426,f2427,f2428,f2429,f2430,f2431,
+f2432,f2433,f2434,f2435,f2436,f2437,f2438,f2439,f2440,f2441,f2442,f2443,f2444,f2445,f2446,f2447,
+f2448,f2449,f2450,f2451,f2452,f2453,f2454,f2455,f2456,f2457,f2458,f2459,f2460,f2461,f2462,f2463,
+f2464,f2465,f2466,f2467,f2468,f2469,f2470,f2471,f2472,f2473,f2474,f2475,f2476,f2477,f2478,f2479,
+f2480,f2481,f2482,f2483,f2484,f2485,f2486,f2487,f2488,f2489,f2490,f2491,f2492,f2493,f2494,f2495,
+f2496,f2497,f2498,f2499,f2500,f2501,f2502,f2503,f2504,f2505,f2506,f2507,f2508,f2509,f2510,f2511,
+f2512,f2513,f2514,f2515,f2516,f2517,f2518,f2519,f2520,f2521,f2522,f2523,f2524,f2525,f2526,f2527,
+f2528,f2529,f2530,f2531,f2532,f2533,f2534,f2535,f2536,f2537,f2538,f2539,f2540,f2541,f2542,f2543,
+f2544,f2545,f2546,f2547,f2548,f2549,f2550,f2551,f2552,f2553,f2554,f2555,f2556,f2557,f2558,f2559,
+f2560,f2561,f2562,f2563,f2564,f2565,f2566,f2567,f2568,f2569,f2570,f2571,f2572,f2573,f2574,f2575,
+f2576,f2577,f2578,f2579,f2580,f2581,f2582,f2583,f2584,f2585,f2586,f2587,f2588,f2589,f2590,f2591,
+f2592,f2593,f2594,f2595,f2596,f2597,f2598,f2599,f2600,f2601,f2602,f2603,f2604,f2605,f2606,f2607,
+f2608,f2609,f2610,f2611,f2612,f2613,f2614,f2615,f2616,f2617,f2618,f2619,f2620,f2621,f2622,f2623,
+f2624,f2625,f2626,f2627,f2628,f2629,f2630,f2631,f2632,f2633,f2634,f2635,f2636,f2637,f2638,f2639,
+f2640,f2641,f2642,f2643,f2644,f2645,f2646,f2647,f2648,f2649,f2650,f2651,f2652,f2653,f2654,f2655,
+f2656,f2657,f2658,f2659,f2660,f2661,f2662,f2663,f2664,f2665,f2666,f2667,f2668,f2669,f2670,f2671,
+f2672,f2673,f2674,f2675,f2676,f2677,f2678,f2679,f2680,f2681,f2682,f2683,f2684,f2685,f2686,f2687,
+f2688,f2689,f2690,f2691,f2692,f2693,f2694,f2695,f2696,f2697,f2698,f2699,f2700,f2701,f2702,f2703,
+f2704,f2705,f2706,f2707,f2708,f2709,f2710,f2711,f2712,f2713,f2714,f2715,f2716,f2717,f2718,f2719,
+f2720,f2721,f2722,f2723,f2724,f2725,f2726,f2727,f2728,f2729,f2730,f2731,f2732,f2733,f2734,f2735,
+f2736,f2737,f2738,f2739,f2740,f2741,f2742,f2743,f2744,f2745,f2746,f2747,f2748,f2749,f2750,f2751,
+f2752,f2753,f2754,f2755,f2756,f2757,f2758,f2759,f2760,f2761,f2762,f2763,f2764,f2765,f2766,f2767,
+f2768,f2769,f2770,f2771,f2772,f2773,f2774,f2775,f2776,f2777,f2778,f2779,f2780,f2781,f2782,f2783,
+f2784,f2785,f2786,f2787,f2788,f2789,f2790,f2791,f2792,f2793,f2794,f2795,f2796,f2797,f2798,f2799,
+f2800,f2801,f2802,f2803,f2804,f2805,f2806,f2807,f2808,f2809,f2810,f2811,f2812,f2813,f2814,f2815,
+f2816,f2817,f2818,f2819,f2820,f2821,f2822,f2823,f2824,f2825,f2826,f2827,f2828,f2829,f2830,f2831,
+f2832,f2833,f2834,f2835,f2836,f2837,f2838,f2839,f2840,f2841,f2842,f2843,f2844,f2845,f2846,f2847,
+f2848,f2849,f2850,f2851,f2852,f2853,f2854,f2855,f2856,f2857,f2858,f2859,f2860,f2861,f2862,f2863,
+f2864,f2865,f2866,f2867,f2868,f2869,f2870,f2871,f2872,f2873,f2874,f2875,f2876,f2877,f2878,f2879,
+f2880,f2881,f2882,f2883,f2884,f2885,f2886,f2887,f2888,f2889,f2890,f2891,f2892,f2893,f2894,f2895,
+f2896,f2897,f2898,f2899,f2900,f2901,f2902,f2903,f2904,f2905,f2906,f2907,f2908,f2909,f2910,f2911,
+f2912,f2913,f2914,f2915,f2916,f2917,f2918,f2919,f2920,f2921,f2922,f2923,f2924,f2925,f2926,f2927,
+f2928,f2929,f2930,f2931,f2932,f2933,f2934,f2935,f2936,f2937,f2938,f2939,f2940,f2941,f2942,f2943,
+f2944,f2945,f2946,f2947,f2948,f2949,f2950,f2951,f2952,f2953,f2954,f2955,f2956,f2957,f2958,f2959,
+f2960,f2961,f2962,f2963,f2964,f2965,f2966,f2967,f2968,f2969,f2970,f2971,f2972,f2973,f2974,f2975,
+f2976,f2977,f2978,f2979,f2980,f2981,f2982,f2983,f2984,f2985,f2986,f2987,f2988,f2989,f2990,f2991,
+f2992,f2993,f2994,f2995,f2996,f2997,f2998,f2999,f3000,f3001,f3002,f3003,f3004,f3005,f3006,f3007,
+f3008,f3009,f3010,f3011,f3012,f3013,f3014,f3015,f3016,f3017,f3018,f3019,f3020,f3021,f3022,f3023,
+f3024,f3025,f3026,f3027,f3028,f3029,f3030,f3031,f3032,f3033,f3034,f3035,f3036,f3037,f3038,f3039,
+f3040,f3041,f3042,f3043,f3044,f3045,f3046,f3047,f3048,f3049,f3050,f3051,f3052,f3053,f3054,f3055,
+f3056,f3057,f3058,f3059,f3060,f3061,f3062,f3063,f3064,f3065,f3066,f3067,f3068,f3069,f3070,f3071,
+f3072,f3073,f3074,f3075,f3076,f3077,f3078,f3079,f3080,f3081,f3082,f3083,f3084,f3085,f3086,f3087,
+f3088,f3089,f3090,f3091,f3092,f3093,f3094,f3095,f3096,f3097,f3098,f3099,f3100,f3101,f3102,f3103,
+f3104,f3105,f3106,f3107,f3108,f3109,f3110,f3111,f3112,f3113,f3114,f3115,f3116,f3117,f3118,f3119,
+f3120,f3121,f3122,f3123,f3124,f3125,f3126,f3127,f3128,f3129,f3130,f3131,f3132,f3133,f3134,f3135,
+f3136,f3137,f3138,f3139,f3140,f3141,f3142,f3143,f3144,f3145,f3146,f3147,f3148,f3149,f3150,f3151,
+f3152,f3153,f3154,f3155,f3156,f3157,f3158,f3159,f3160,f3161,f3162,f3163,f3164,f3165,f3166,f3167,
+f3168,f3169,f3170,f3171,f3172,f3173,f3174,f3175,f3176,f3177,f3178,f3179,f3180,f3181,f3182,f3183,
+f3184,f3185,f3186,f3187,f3188,f3189,f3190,f3191,f3192,f3193,f3194,f3195,f3196,f3197,f3198,f3199,
+f3200,f3201,f3202,f3203,f3204,f3205,f3206,f3207,f3208,f3209,f3210,f3211,f3212,f3213,f3214,f3215,
+f3216,f3217,f3218,f3219,f3220,f3221,f3222,f3223,f3224,f3225,f3226,f3227,f3228,f3229,f3230,f3231,
+f3232,f3233,f3234,f3235,f3236,f3237,f3238,f3239,f3240,f3241,f3242,f3243,f3244,f3245,f3246,f3247,
+f3248,f3249,f3250,f3251,f3252,f3253,f3254,f3255,f3256,f3257,f3258,f3259,f3260,f3261,f3262,f3263,
+f3264,f3265,f3266,f3267,f3268,f3269,f3270,f3271,f3272,f3273,f3274,f3275,f3276,f3277,f3278,f3279,
+f3280,f3281,f3282,f3283,f3284,f3285,f3286,f3287,f3288,f3289,f3290,f3291,f3292,f3293,f3294,f3295,
+f3296,f3297,f3298,f3299,f3300,f3301,f3302,f3303,f3304,f3305,f3306,f3307,f3308,f3309,f3310,f3311,
+f3312,f3313,f3314,f3315,f3316,f3317,f3318,f3319,f3320,f3321,f3322,f3323,f3324,f3325,f3326,f3327,
+f3328,f3329,f3330,f3331,f3332,f3333,f3334,f3335,f3336,f3337,f3338,f3339,f3340,f3341,f3342,f3343,
+f3344,f3345,f3346,f3347,f3348,f3349,f3350,f3351,f3352,f3353,f3354,f3355,f3356,f3357,f3358,f3359,
+f3360,f3361,f3362,f3363,f3364,f3365,f3366,f3367,f3368,f3369,f3370,f3371,f3372,f3373,f3374,f3375,
+f3376,f3377,f3378,f3379,f3380,f3381,f3382,f3383,f3384,f3385,f3386,f3387,f3388,f3389,f3390,f3391,
+f3392,f3393,f3394,f3395,f3396,f3397,f3398,f3399,f3400,f3401,f3402,f3403,f3404,f3405,f3406,f3407,
+f3408,f3409,f3410,f3411,f3412,f3413,f3414,f3415,f3416,f3417,f3418,f3419,f3420,f3421,f3422,f3423,
+f3424,f3425,f3426,f3427,f3428,f3429,f3430,f3431,f3432,f3433,f3434,f3435,f3436,f3437,f3438,f3439,
+f3440,f3441,f3442,f3443,f3444,f3445,f3446,f3447,f3448,f3449,f3450,f3451,f3452,f3453,f3454,f3455,
+f3456,f3457,f3458,f3459,f3460,f3461,f3462,f3463,f3464,f3465,f3466,f3467,f3468,f3469,f3470,f3471,
+f3472,f3473,f3474,f3475,f3476,f3477,f3478,f3479,f3480,f3481,f3482,f3483,f3484,f3485,f3486,f3487,
+f3488,f3489,f3490,f3491,f3492,f3493,f3494,f3495,f3496,f3497,f3498,f3499,f3500,f3501,f3502,f3503,
+f3504,f3505,f3506,f3507,f3508,f3509,f3510,f3511,f3512,f3513,f3514,f3515,f3516,f3517,f3518,f3519,
+f3520,f3521,f3522,f3523,f3524,f3525,f3526,f3527,f3528,f3529,f3530,f3531,f3532,f3533,f3534,f3535,
+f3536,f3537,f3538,f3539,f3540,f3541,f3542,f3543,f3544,f3545,f3546,f3547,f3548,f3549,f3550,f3551,
+f3552,f3553,f3554,f3555,f3556,f3557,f3558,f3559,f3560,f3561,f3562,f3563,f3564,f3565,f3566,f3567,
+f3568,f3569,f3570,f3571,f3572,f3573,f3574,f3575,f3576,f3577,f3578,f3579,f3580,f3581,f3582,f3583,
+f3584,f3585,f3586,f3587,f3588,f3589,f3590,f3591,f3592,f3593,f3594,f3595,f3596,f3597,f3598,f3599,
+f3600,f3601,f3602,f3603,f3604,f3605,f3606,f3607,f3608,f3609,f3610,f3611,f3612,f3613,f3614,f3615,
+f3616,f3617,f3618,f3619,f3620,f3621,f3622,f3623,f3624,f3625,f3626,f3627,f3628,f3629,f3630,f3631,
+f3632,f3633,f3634,f3635,f3636,f3637,f3638,f3639,f3640,f3641,f3642,f3643,f3644,f3645,f3646,f3647,
+f3648,f3649,f3650,f3651,f3652,f3653,f3654,f3655,f3656,f3657,f3658,f3659,f3660,f3661,f3662,f3663,
+f3664,f3665,f3666,f3667,f3668,f3669,f3670,f3671,f3672,f3673,f3674,f3675,f3676,f3677,f3678,f3679,
+f3680,f3681,f3682,f3683,f3684,f3685,f3686,f3687,f3688,f3689,f3690,f3691,f3692,f3693,f3694,f3695,
+f3696,f3697,f3698,f3699,f3700,f3701,f3702,f3703,f3704,f3705,f3706,f3707,f3708,f3709,f3710,f3711,
+f3712,f3713,f3714,f3715,f3716,f3717,f3718,f3719,f3720,f3721,f3722,f3723,f3724,f3725,f3726,f3727,
+f3728,f3729,f3730,f3731,f3732,f3733,f3734,f3735,f3736,f3737,f3738,f3739,f3740,f3741,f3742,f3743,
+f3744,f3745,f3746,f3747,f3748,f3749,f3750,f3751,f3752,f3753,f3754,f3755,f3756,f3757,f3758,f3759,
+f3760,f3761,f3762,f3763,f3764,f3765,f3766,f3767,f3768,f3769,f3770,f3771,f3772,f3773,f3774,f3775,
+f3776,f3777,f3778,f3779,f3780,f3781,f3782,f3783,f3784,f3785,f3786,f3787,f3788,f3789,f3790,f3791,
+f3792,f3793,f3794,f3795,f3796,f3797,f3798,f3799,f3800,f3801,f3802,f3803,f3804,f3805,f3806,f3807,
+f3808,f3809,f3810,f3811,f3812,f3813,f3814,f3815,f3816,f3817,f3818,f3819,f3820,f3821,f3822,f3823,
+f3824,f3825,f3826,f3827,f3828,f3829,f3830,f3831,f3832,f3833,f3834,f3835,f3836,f3837,f3838,f3839,
+f3840,f3841,f3842,f3843,f3844,f3845,f3846,f3847,f3848,f3849,f3850,f3851,f3852,f3853,f3854,f3855,
+f3856,f3857,f3858,f3859,f3860,f3861,f3862,f3863,f3864,f3865,f3866,f3867,f3868,f3869,f3870,f3871,
+f3872,f3873,f3874,f3875,f3876,f3877,f3878,f3879,f3880,f3881,f3882,f3883,f3884,f3885,f3886,f3887,
+f3888,f3889,f3890,f3891,f3892,f3893,f3894,f3895,f3896,f3897,f3898,f3899,f3900,f3901,f3902,f3903,
+f3904,f3905,f3906,f3907,f3908,f3909,f3910,f3911,f3912,f3913,f3914,f3915,f3916,f3917,f3918,f3919,
+f3920,f3921,f3922,f3923,f3924,f3925,f3926,f3927,f3928,f3929,f3930,f3931,f3932,f3933,f3934,f3935,
+f3936,f3937,f3938,f3939,f3940,f3941,f3942,f3943,f3944,f3945,f3946,f3947,f3948,f3949,f3950,f3951,
+f3952,f3953,f3954,f3955,f3956,f3957,f3958,f3959,f3960,f3961,f3962,f3963,f3964,f3965,f3966,f3967,
+f3968,f3969,f3970,f3971,f3972,f3973,f3974,f3975,f3976,f3977,f3978,f3979,f3980,f3981,f3982,f3983,
+f3984,f3985,f3986,f3987,f3988,f3989,f3990,f3991,f3992,f3993,f3994,f3995,f3996,f3997,f3998,f3999,
+f4000,f4001,f4002,f4003,f4004,f4005,f4006,f4007,f4008,f4009,f4010,f4011,f4012,f4013,f4014,f4015,
+f4016,f4017,f4018,f4019,f4020,f4021,f4022,f4023,f4024,f4025,f4026,f4027,f4028,f4029,f4030,f4031,
+f4032,f4033,f4034,f4035,f4036,f4037,f4038,f4039,f4040,f4041,f4042,f4043,f4044,f4045,f4046,f4047,
+f4048,f4049,f4050,f4051,f4052,f4053,f4054,f4055,f4056,f4057,f4058,f4059,f4060,f4061,f4062,f4063,
+f4064,f4065,f4066,f4067,f4068,f4069,f4070,f4071,f4072,f4073,f4074,f4075,f4076,f4077,f4078,f4079,
+f4080,f4081,f4082,f4083,f4084,f4085,f4086,f4087,f4088,f4089,f4090,f4091,f4092,f4093,f4094,f4095,
+};
+static const int ref[4096][2] = {
+{     2, 0},
+{     0, 1},{     1, 0},{    -1, 0},{     3, 0},{    -1, 0},{     2, 0},{    -2, 0},{     5, 0},
+{    -3, 0},{     4, 0},{    -4, 0},{     9, 0},{    -7, 0},{     8, 0},{    -8, 0},{    17, 0},
+{   -15, 0},{    16, 0},{   -16, 0},{    33, 0},{   -31, 0},{    32, 0},{   -32, 0},{    65, 0},
+{   -63, 0},{    64, 0},{   -64, 0},{   129, 0},{  -127, 0},{   128, 0},{  -128, 0},{   257, 0},
+{  -255, 0},{   256, 0},{  -256, 0},{   513, 0},{  -511, 0},{   512, 0},{  -512, 0},{  1025, 0},
+{ -1023, 0},{  1024, 0},{ -1024, 0},{  2049, 0},{ -2047, 0},{  2048, 0},{ -2048, 0},{  4097, 0},
+{ -4095, 0},{  4096, 0},{ -4096, 0},{  8193, 0},{ -8191, 0},{  8192, 0},{ -8192, 0},{ 16385, 0},
+{-16383, 0},{ 16384, 0},{-16384, 0},{ 32769, 0},{-32767, 0},{ 32768, 0},{-32768, 0},{     0, 1},
+{    -2, 1},{    -1, 0},{    -3, 1},{     1, 1},{    -3, 1},{     0, 1},{    -4, 1},{     3, 1},
+{    -5, 1},{     2, 1},{    -6, 1},{     7, 1},{    -9, 1},{     6, 1},{   -10, 1},{    15, 1},
+{   -17, 1},{    14, 1},{   -18, 1},{    31, 1},{   -33, 1},{    30, 1},{   -34, 1},{    63, 1},
+{   -65, 1},{    62, 1},{   -66, 1},{   127, 1},{  -129, 1},{   126, 1},{  -130, 1},{   255, 1},
+{  -257, 1},{   254, 1},{  -258, 1},{   511, 1},{  -513, 1},{   510, 1},{  -514, 1},{  1023, 1},
+{ -1025, 1},{  1022, 1},{ -1026, 1},{  2047, 1},{ -2049, 1},{  2046, 1},{ -2050, 1},{  4095, 1},
+{ -4097, 1},{  4094, 1},{ -4098, 1},{  8191, 1},{ -8193, 1},{  8190, 1},{ -8194, 1},{ 16383, 1},
+{-16385, 1},{ 16382, 1},{-16386, 1},{ 32767, 1},{-32769, 1},{ 32766, 1},{-32770, 1},{     1, 0},
+{    -1, 0},{     0, 0},{    -2, 0},{     2, 0},{    -2, 0},{     1, 0},{    -3, 0},{     4, 0},
+{    -4, 0},{     3, 0},{    -5, 0},{     8, 0},{    -8, 0},{     7, 0},{    -9, 0},{    16, 0},
+{   -16, 0},{    15, 0},{   -17, 0},{    32, 0},{   -32, 0},{    31, 0},{   -33, 0},{    64, 0},
+{   -64, 0},{    63, 0},{   -65, 0},{   128, 0},{  -128, 0},{   127, 0},{  -129, 0},{   256, 0},
+{  -256, 0},{   255, 0},{  -257, 0},{   512, 0},{  -512, 0},{   511, 0},{  -513, 0},{  1024, 0},
+{ -1024, 0},{  1023, 0},{ -1025, 0},{  2048, 0},{ -2048, 0},{  2047, 0},{ -2049, 0},{  4096, 0},
+{ -4096, 0},{  4095, 0},{ -4097, 0},{  8192, 0},{ -8192, 0},{  8191, 0},{ -8193, 0},{ 16384, 0},
+{-16384, 0},{ 16383, 0},{-16385, 0},{ 32768, 0},{-32768, 0},{ 32767, 0},{-32769, 0},{    -1, 0},
+{    -3, 1},{    -2, 0},{    -4, 1},{     0, 1},{    -4, 1},{    -1, 0},{    -5, 1},{     2, 1},
+{    -6, 1},{     1, 1},{    -7, 1},{     6, 1},{   -10, 1},{     5, 1},{   -11, 1},{    14, 1},
+{   -18, 1},{    13, 1},{   -19, 1},{    30, 1},{   -34, 1},{    29, 1},{   -35, 1},{    62, 1},
+{   -66, 1},{    61, 1},{   -67, 1},{   126, 1},{  -130, 1},{   125, 1},{  -131, 1},{   254, 1},
+{  -258, 1},{   253, 1},{  -259, 1},{   510, 1},{  -514, 1},{   509, 1},{  -515, 1},{  1022, 1},
+{ -1026, 1},{  1021, 1},{ -1027, 1},{  2046, 1},{ -2050, 1},{  2045, 1},{ -2051, 1},{  4094, 1},
+{ -4098, 1},{  4093, 1},{ -4099, 1},{  8190, 1},{ -8194, 1},{  8189, 1},{ -8195, 1},{ 16382, 1},
+{-16386, 1},{ 16381, 1},{-16387, 1},{ 32766, 1},{-32770, 1},{ 32765, 1},{-32771, 1},{     3, 0},
+{     1, 1},{     2, 0},{     0, 1},{     4, 0},{     0, 1},{     3, 0},{    -1, 0},{     6, 0},
+{    -2, 0},{     5, 0},{    -3, 0},{    10, 0},{    -6, 0},{     9, 0},{    -7, 0},{    18, 0},
+{   -14, 0},{    17, 0},{   -15, 0},{    34, 0},{   -30, 0},{    33, 0},{   -31, 0},{    66, 0},
+{   -62, 0},{    65, 0},{   -63, 0},{   130, 0},{  -126, 0},{   129, 0},{  -127, 0},{   258, 0},
+{  -254, 0},{   257, 0},{  -255, 0},{   514, 0},{  -510, 0},{   513, 0},{  -511, 0},{  1026, 0},
+{ -1022, 0},{  1025, 0},{ -1023, 0},{  2050, 0},{ -2046, 0},{  2049, 0},{ -2047, 0},{  4098, 0},
+{ -4094, 0},{  4097, 0},{ -4095, 0},{  8194, 0},{ -8190, 0},{  8193, 0},{ -8191, 0},{ 16386, 0},
+{-16382, 0},{ 16385, 0},{-16383, 0},{ 32770, 0},{-32766, 0},{ 32769, 0},{-32767, 0},{    -1, 0},
+{    -3, 1},{    -2, 0},{    -4, 1},{     0, 1},{    -4, 1},{    -1, 0},{    -5, 1},{     2, 1},
+{    -6, 1},{     1, 1},{    -7, 1},{     6, 1},{   -10, 1},{     5, 1},{   -11, 1},{    14, 1},
+{   -18, 1},{    13, 1},{   -19, 1},{    30, 1},{   -34, 1},{    29, 1},{   -35, 1},{    62, 1},
+{   -66, 1},{    61, 1},{   -67, 1},{   126, 1},{  -130, 1},{   125, 1},{  -131, 1},{   254, 1},
+{  -258, 1},{   253, 1},{  -259, 1},{   510, 1},{  -514, 1},{   509, 1},{  -515, 1},{  1022, 1},
+{ -1026, 1},{  1021, 1},{ -1027, 1},{  2046, 1},{ -2050, 1},{  2045, 1},{ -2051, 1},{  4094, 1},
+{ -4098, 1},{  4093, 1},{ -4099, 1},{  8190, 1},{ -8194, 1},{  8189, 1},{ -8195, 1},{ 16382, 1},
+{-16386, 1},{ 16381, 1},{-16387, 1},{ 32766, 1},{-32770, 1},{ 32765, 1},{-32771, 1},{     2, 0},
+{     0, 1},{     1, 0},{    -1, 0},{     3, 0},{    -1, 0},{     2, 0},{    -2, 0},{     5, 0},
+{    -3, 0},{     4, 0},{    -4, 0},{     9, 0},{    -7, 0},{     8, 0},{    -8, 0},{    17, 0},
+{   -15, 0},{    16, 0},{   -16, 0},{    33, 0},{   -31, 0},{    32, 0},{   -32, 0},{    65, 0},
+{   -63, 0},{    64, 0},{   -64, 0},{   129, 0},{  -127, 0},{   128, 0},{  -128, 0},{   257, 0},
+{  -255, 0},{   256, 0},{  -256, 0},{   513, 0},{  -511, 0},{   512, 0},{  -512, 0},{  1025, 0},
+{ -1023, 0},{  1024, 0},{ -1024, 0},{  2049, 0},{ -2047, 0},{  2048, 0},{ -2048, 0},{  4097, 0},
+{ -4095, 0},{  4096, 0},{ -4096, 0},{  8193, 0},{ -8191, 0},{  8192, 0},{ -8192, 0},{ 16385, 0},
+{-16383, 0},{ 16384, 0},{-16384, 0},{ 32769, 0},{-32767, 0},{ 32768, 0},{-32768, 0},{    -2, 0},
+{    -4, 1},{    -3, 0},{    -5, 1},{    -1, 0},{    -5, 1},{    -2, 0},{    -6, 1},{     1, 1},
+{    -7, 1},{     0, 1},{    -8, 1},{     5, 1},{   -11, 1},{     4, 1},{   -12, 1},{    13, 1},
+{   -19, 1},{    12, 1},{   -20, 1},{    29, 1},{   -35, 1},{    28, 1},{   -36, 1},{    61, 1},
+{   -67, 1},{    60, 1},{   -68, 1},{   125, 1},{  -131, 1},{   124, 1},{  -132, 1},{   253, 1},
+{  -259, 1},{   252, 1},{  -260, 1},{   509, 1},{  -515, 1},{   508, 1},{  -516, 1},{  1021, 1},
+{ -1027, 1},{  1020, 1},{ -1028, 1},{  2045, 1},{ -2051, 1},{  2044, 1},{ -2052, 1},{  4093, 1},
+{ -4099, 1},{  4092, 1},{ -4100, 1},{  8189, 1},{ -8195, 1},{  8188, 1},{ -8196, 1},{ 16381, 1},
+{-16387, 1},{ 16380, 1},{-16388, 1},{ 32765, 1},{-32771, 1},{ 32764, 1},{-32772, 1},{     5, 0},
+{     3, 1},{     4, 0},{     2, 1},{     6, 0},{     2, 1},{     5, 0},{     1, 1},{     8, 0},
+{     0, 1},{     7, 0},{    -1, 0},{    12, 0},{    -4, 0},{    11, 0},{    -5, 0},{    20, 0},
+{   -12, 0},{    19, 0},{   -13, 0},{    36, 0},{   -28, 0},{    35, 0},{   -29, 0},{    68, 0},
+{   -60, 0},{    67, 0},{   -61, 0},{   132, 0},{  -124, 0},{   131, 0},{  -125, 0},{   260, 0},
+{  -252, 0},{   259, 0},{  -253, 0},{   516, 0},{  -508, 0},{   515, 0},{  -509, 0},{  1028, 0},
+{ -1020, 0},{  1027, 0},{ -1021, 0},{  2052, 0},{ -2044, 0},{  2051, 0},{ -2045, 0},{  4100, 0},
+{ -4092, 0},{  4099, 0},{ -4093, 0},{  8196, 0},{ -8188, 0},{  8195, 0},{ -8189, 0},{ 16388, 0},
+{-16380, 0},{ 16387, 0},{-16381, 0},{ 32772, 0},{-32764, 0},{ 32771, 0},{-32765, 0},{    -3, 0},
+{    -5, 1},{    -4, 0},{    -6, 1},{    -2, 0},{    -6, 1},{    -3, 0},{    -7, 1},{     0, 1},
+{    -8, 1},{    -1, 0},{    -9, 1},{     4, 1},{   -12, 1},{     3, 1},{   -13, 1},{    12, 1},
+{   -20, 1},{    11, 1},{   -21, 1},{    28, 1},{   -36, 1},{    27, 1},{   -37, 1},{    60, 1},
+{   -68, 1},{    59, 1},{   -69, 1},{   124, 1},{  -132, 1},{   123, 1},{  -133, 1},{   252, 1},
+{  -260, 1},{   251, 1},{  -261, 1},{   508, 1},{  -516, 1},{   507, 1},{  -517, 1},{  1020, 1},
+{ -1028, 1},{  1019, 1},{ -1029, 1},{  2044, 1},{ -2052, 1},{  2043, 1},{ -2053, 1},{  4092, 1},
+{ -4100, 1},{  4091, 1},{ -4101, 1},{  8188, 1},{ -8196, 1},{  8187, 1},{ -8197, 1},{ 16380, 1},
+{-16388, 1},{ 16379, 1},{-16389, 1},{ 32764, 1},{-32772, 1},{ 32763, 1},{-32773, 1},{     4, 0},
+{     2, 1},{     3, 0},{     1, 1},{     5, 0},{     1, 1},{     4, 0},{     0, 1},{     7, 0},
+{    -1, 0},{     6, 0},{    -2, 0},{    11, 0},{    -5, 0},{    10, 0},{    -6, 0},{    19, 0},
+{   -13, 0},{    18, 0},{   -14, 0},{    35, 0},{   -29, 0},{    34, 0},{   -30, 0},{    67, 0},
+{   -61, 0},{    66, 0},{   -62, 0},{   131, 0},{  -125, 0},{   130, 0},{  -126, 0},{   259, 0},
+{  -253, 0},{   258, 0},{  -254, 0},{   515, 0},{  -509, 0},{   514, 0},{  -510, 0},{  1027, 0},
+{ -1021, 0},{  1026, 0},{ -1022, 0},{  2051, 0},{ -2045, 0},{  2050, 0},{ -2046, 0},{  4099, 0},
+{ -4093, 0},{  4098, 0},{ -4094, 0},{  8195, 0},{ -8189, 0},{  8194, 0},{ -8190, 0},{ 16387, 0},
+{-16381, 0},{ 16386, 0},{-16382, 0},{ 32771, 0},{-32765, 0},{ 32770, 0},{-32766, 0},{    -4, 0},
+{    -6, 1},{    -5, 0},{    -7, 1},{    -3, 0},{    -7, 1},{    -4, 0},{    -8, 1},{    -1, 0},
+{    -9, 1},{    -2, 0},{   -10, 1},{     3, 1},{   -13, 1},{     2, 1},{   -14, 1},{    11, 1},
+{   -21, 1},{    10, 1},{   -22, 1},{    27, 1},{   -37, 1},{    26, 1},{   -38, 1},{    59, 1},
+{   -69, 1},{    58, 1},{   -70, 1},{   123, 1},{  -133, 1},{   122, 1},{  -134, 1},{   251, 1},
+{  -261, 1},{   250, 1},{  -262, 1},{   507, 1},{  -517, 1},{   506, 1},{  -518, 1},{  1019, 1},
+{ -1029, 1},{  1018, 1},{ -1030, 1},{  2043, 1},{ -2053, 1},{  2042, 1},{ -2054, 1},{  4091, 1},
+{ -4101, 1},{  4090, 1},{ -4102, 1},{  8187, 1},{ -8197, 1},{  8186, 1},{ -8198, 1},{ 16379, 1},
+{-16389, 1},{ 16378, 1},{-16390, 1},{ 32763, 1},{-32773, 1},{ 32762, 1},{-32774, 1},{     9, 0},
+{     7, 1},{     8, 0},{     6, 1},{    10, 0},{     6, 1},{     9, 0},{     5, 1},{    12, 0},
+{     4, 1},{    11, 0},{     3, 1},{    16, 0},{     0, 1},{    15, 0},{    -1, 0},{    24, 0},
+{    -8, 0},{    23, 0},{    -9, 0},{    40, 0},{   -24, 0},{    39, 0},{   -25, 0},{    72, 0},
+{   -56, 0},{    71, 0},{   -57, 0},{   136, 0},{  -120, 0},{   135, 0},{  -121, 0},{   264, 0},
+{  -248, 0},{   263, 0},{  -249, 0},{   520, 0},{  -504, 0},{   519, 0},{  -505, 0},{  1032, 0},
+{ -1016, 0},{  1031, 0},{ -1017, 0},{  2056, 0},{ -2040, 0},{  2055, 0},{ -2041, 0},{  4104, 0},
+{ -4088, 0},{  4103, 0},{ -4089, 0},{  8200, 0},{ -8184, 0},{  8199, 0},{ -8185, 0},{ 16392, 0},
+{-16376, 0},{ 16391, 0},{-16377, 0},{ 32776, 0},{-32760, 0},{ 32775, 0},{-32761, 0},{    -7, 0},
+{    -9, 1},{    -8, 0},{   -10, 1},{    -6, 0},{   -10, 1},{    -7, 0},{   -11, 1},{    -4, 0},
+{   -12, 1},{    -5, 0},{   -13, 1},{     0, 1},{   -16, 1},{    -1, 0},{   -17, 1},{     8, 1},
+{   -24, 1},{     7, 1},{   -25, 1},{    24, 1},{   -40, 1},{    23, 1},{   -41, 1},{    56, 1},
+{   -72, 1},{    55, 1},{   -73, 1},{   120, 1},{  -136, 1},{   119, 1},{  -137, 1},{   248, 1},
+{  -264, 1},{   247, 1},{  -265, 1},{   504, 1},{  -520, 1},{   503, 1},{  -521, 1},{  1016, 1},
+{ -1032, 1},{  1015, 1},{ -1033, 1},{  2040, 1},{ -2056, 1},{  2039, 1},{ -2057, 1},{  4088, 1},
+{ -4104, 1},{  4087, 1},{ -4105, 1},{  8184, 1},{ -8200, 1},{  8183, 1},{ -8201, 1},{ 16376, 1},
+{-16392, 1},{ 16375, 1},{-16393, 1},{ 32760, 1},{-32776, 1},{ 32759, 1},{-32777, 1},{     8, 0},
+{     6, 1},{     7, 0},{     5, 1},{     9, 0},{     5, 1},{     8, 0},{     4, 1},{    11, 0},
+{     3, 1},{    10, 0},{     2, 1},{    15, 0},{    -1, 0},{    14, 0},{    -2, 0},{    23, 0},
+{    -9, 0},{    22, 0},{   -10, 0},{    39, 0},{   -25, 0},{    38, 0},{   -26, 0},{    71, 0},
+{   -57, 0},{    70, 0},{   -58, 0},{   135, 0},{  -121, 0},{   134, 0},{  -122, 0},{   263, 0},
+{  -249, 0},{   262, 0},{  -250, 0},{   519, 0},{  -505, 0},{   518, 0},{  -506, 0},{  1031, 0},
+{ -1017, 0},{  1030, 0},{ -1018, 0},{  2055, 0},{ -2041, 0},{  2054, 0},{ -2042, 0},{  4103, 0},
+{ -4089, 0},{  4102, 0},{ -4090, 0},{  8199, 0},{ -8185, 0},{  8198, 0},{ -8186, 0},{ 16391, 0},
+{-16377, 0},{ 16390, 0},{-16378, 0},{ 32775, 0},{-32761, 0},{ 32774, 0},{-32762, 0},{    -8, 0},
+{   -10, 1},{    -9, 0},{   -11, 1},{    -7, 0},{   -11, 1},{    -8, 0},{   -12, 1},{    -5, 0},
+{   -13, 1},{    -6, 0},{   -14, 1},{    -1, 0},{   -17, 1},{    -2, 0},{   -18, 1},{     7, 1},
+{   -25, 1},{     6, 1},{   -26, 1},{    23, 1},{   -41, 1},{    22, 1},{   -42, 1},{    55, 1},
+{   -73, 1},{    54, 1},{   -74, 1},{   119, 1},{  -137, 1},{   118, 1},{  -138, 1},{   247, 1},
+{  -265, 1},{   246, 1},{  -266, 1},{   503, 1},{  -521, 1},{   502, 1},{  -522, 1},{  1015, 1},
+{ -1033, 1},{  1014, 1},{ -1034, 1},{  2039, 1},{ -2057, 1},{  2038, 1},{ -2058, 1},{  4087, 1},
+{ -4105, 1},{  4086, 1},{ -4106, 1},{  8183, 1},{ -8201, 1},{  8182, 1},{ -8202, 1},{ 16375, 1},
+{-16393, 1},{ 16374, 1},{-16394, 1},{ 32759, 1},{-32777, 1},{ 32758, 1},{-32778, 1},{    17, 0},
+{    15, 1},{    16, 0},{    14, 1},{    18, 0},{    14, 1},{    17, 0},{    13, 1},{    20, 0},
+{    12, 1},{    19, 0},{    11, 1},{    24, 0},{     8, 1},{    23, 0},{     7, 1},{    32, 0},
+{     0, 1},{    31, 0},{    -1, 0},{    48, 0},{   -16, 0},{    47, 0},{   -17, 0},{    80, 0},
+{   -48, 0},{    79, 0},{   -49, 0},{   144, 0},{  -112, 0},{   143, 0},{  -113, 0},{   272, 0},
+{  -240, 0},{   271, 0},{  -241, 0},{   528, 0},{  -496, 0},{   527, 0},{  -497, 0},{  1040, 0},
+{ -1008, 0},{  1039, 0},{ -1009, 0},{  2064, 0},{ -2032, 0},{  2063, 0},{ -2033, 0},{  4112, 0},
+{ -4080, 0},{  4111, 0},{ -4081, 0},{  8208, 0},{ -8176, 0},{  8207, 0},{ -8177, 0},{ 16400, 0},
+{-16368, 0},{ 16399, 0},{-16369, 0},{ 32784, 0},{-32752, 0},{ 32783, 0},{-32753, 0},{   -15, 0},
+{   -17, 1},{   -16, 0},{   -18, 1},{   -14, 0},{   -18, 1},{   -15, 0},{   -19, 1},{   -12, 0},
+{   -20, 1},{   -13, 0},{   -21, 1},{    -8, 0},{   -24, 1},{    -9, 0},{   -25, 1},{     0, 1},
+{   -32, 1},{    -1, 0},{   -33, 1},{    16, 1},{   -48, 1},{    15, 1},{   -49, 1},{    48, 1},
+{   -80, 1},{    47, 1},{   -81, 1},{   112, 1},{  -144, 1},{   111, 1},{  -145, 1},{   240, 1},
+{  -272, 1},{   239, 1},{  -273, 1},{   496, 1},{  -528, 1},{   495, 1},{  -529, 1},{  1008, 1},
+{ -1040, 1},{  1007, 1},{ -1041, 1},{  2032, 1},{ -2064, 1},{  2031, 1},{ -2065, 1},{  4080, 1},
+{ -4112, 1},{  4079, 1},{ -4113, 1},{  8176, 1},{ -8208, 1},{  8175, 1},{ -8209, 1},{ 16368, 1},
+{-16400, 1},{ 16367, 1},{-16401, 1},{ 32752, 1},{-32784, 1},{ 32751, 1},{-32785, 1},{    16, 0},
+{    14, 1},{    15, 0},{    13, 1},{    17, 0},{    13, 1},{    16, 0},{    12, 1},{    19, 0},
+{    11, 1},{    18, 0},{    10, 1},{    23, 0},{     7, 1},{    22, 0},{     6, 1},{    31, 0},
+{    -1, 0},{    30, 0},{    -2, 0},{    47, 0},{   -17, 0},{    46, 0},{   -18, 0},{    79, 0},
+{   -49, 0},{    78, 0},{   -50, 0},{   143, 0},{  -113, 0},{   142, 0},{  -114, 0},{   271, 0},
+{  -241, 0},{   270, 0},{  -242, 0},{   527, 0},{  -497, 0},{   526, 0},{  -498, 0},{  1039, 0},
+{ -1009, 0},{  1038, 0},{ -1010, 0},{  2063, 0},{ -2033, 0},{  2062, 0},{ -2034, 0},{  4111, 0},
+{ -4081, 0},{  4110, 0},{ -4082, 0},{  8207, 0},{ -8177, 0},{  8206, 0},{ -8178, 0},{ 16399, 0},
+{-16369, 0},{ 16398, 0},{-16370, 0},{ 32783, 0},{-32753, 0},{ 32782, 0},{-32754, 0},{   -16, 0},
+{   -18, 1},{   -17, 0},{   -19, 1},{   -15, 0},{   -19, 1},{   -16, 0},{   -20, 1},{   -13, 0},
+{   -21, 1},{   -14, 0},{   -22, 1},{    -9, 0},{   -25, 1},{   -10, 0},{   -26, 1},{    -1, 0},
+{   -33, 1},{    -2, 0},{   -34, 1},{    15, 1},{   -49, 1},{    14, 1},{   -50, 1},{    47, 1},
+{   -81, 1},{    46, 1},{   -82, 1},{   111, 1},{  -145, 1},{   110, 1},{  -146, 1},{   239, 1},
+{  -273, 1},{   238, 1},{  -274, 1},{   495, 1},{  -529, 1},{   494, 1},{  -530, 1},{  1007, 1},
+{ -1041, 1},{  1006, 1},{ -1042, 1},{  2031, 1},{ -2065, 1},{  2030, 1},{ -2066, 1},{  4079, 1},
+{ -4113, 1},{  4078, 1},{ -4114, 1},{  8175, 1},{ -8209, 1},{  8174, 1},{ -8210, 1},{ 16367, 1},
+{-16401, 1},{ 16366, 1},{-16402, 1},{ 32751, 1},{-32785, 1},{ 32750, 1},{-32786, 1},{    33, 0},
+{    31, 1},{    32, 0},{    30, 1},{    34, 0},{    30, 1},{    33, 0},{    29, 1},{    36, 0},
+{    28, 1},{    35, 0},{    27, 1},{    40, 0},{    24, 1},{    39, 0},{    23, 1},{    48, 0},
+{    16, 1},{    47, 0},{    15, 1},{    64, 0},{     0, 1},{    63, 0},{    -1, 0},{    96, 0},
+{   -32, 0},{    95, 0},{   -33, 0},{   160, 0},{   -96, 0},{   159, 0},{   -97, 0},{   288, 0},
+{  -224, 0},{   287, 0},{  -225, 0},{   544, 0},{  -480, 0},{   543, 0},{  -481, 0},{  1056, 0},
+{  -992, 0},{  1055, 0},{  -993, 0},{  2080, 0},{ -2016, 0},{  2079, 0},{ -2017, 0},{  4128, 0},
+{ -4064, 0},{  4127, 0},{ -4065, 0},{  8224, 0},{ -8160, 0},{  8223, 0},{ -8161, 0},{ 16416, 0},
+{-16352, 0},{ 16415, 0},{-16353, 0},{ 32800, 0},{-32736, 0},{ 32799, 0},{-32737, 0},{   -31, 0},
+{   -33, 1},{   -32, 0},{   -34, 1},{   -30, 0},{   -34, 1},{   -31, 0},{   -35, 1},{   -28, 0},
+{   -36, 1},{   -29, 0},{   -37, 1},{   -24, 0},{   -40, 1},{   -25, 0},{   -41, 1},{   -16, 0},
+{   -48, 1},{   -17, 0},{   -49, 1},{     0, 1},{   -64, 1},{    -1, 0},{   -65, 1},{    32, 1},
+{   -96, 1},{    31, 1},{   -97, 1},{    96, 1},{  -160, 1},{    95, 1},{  -161, 1},{   224, 1},
+{  -288, 1},{   223, 1},{  -289, 1},{   480, 1},{  -544, 1},{   479, 1},{  -545, 1},{   992, 1},
+{ -1056, 1},{   991, 1},{ -1057, 1},{  2016, 1},{ -2080, 1},{  2015, 1},{ -2081, 1},{  4064, 1},
+{ -4128, 1},{  4063, 1},{ -4129, 1},{  8160, 1},{ -8224, 1},{  8159, 1},{ -8225, 1},{ 16352, 1},
+{-16416, 1},{ 16351, 1},{-16417, 1},{ 32736, 1},{-32800, 1},{ 32735, 1},{-32801, 1},{    32, 0},
+{    30, 1},{    31, 0},{    29, 1},{    33, 0},{    29, 1},{    32, 0},{    28, 1},{    35, 0},
+{    27, 1},{    34, 0},{    26, 1},{    39, 0},{    23, 1},{    38, 0},{    22, 1},{    47, 0},
+{    15, 1},{    46, 0},{    14, 1},{    63, 0},{    -1, 0},{    62, 0},{    -2, 0},{    95, 0},
+{   -33, 0},{    94, 0},{   -34, 0},{   159, 0},{   -97, 0},{   158, 0},{   -98, 0},{   287, 0},
+{  -225, 0},{   286, 0},{  -226, 0},{   543, 0},{  -481, 0},{   542, 0},{  -482, 0},{  1055, 0},
+{  -993, 0},{  1054, 0},{  -994, 0},{  2079, 0},{ -2017, 0},{  2078, 0},{ -2018, 0},{  4127, 0},
+{ -4065, 0},{  4126, 0},{ -4066, 0},{  8223, 0},{ -8161, 0},{  8222, 0},{ -8162, 0},{ 16415, 0},
+{-16353, 0},{ 16414, 0},{-16354, 0},{ 32799, 0},{-32737, 0},{ 32798, 0},{-32738, 0},{   -32, 0},
+{   -34, 1},{   -33, 0},{   -35, 1},{   -31, 0},{   -35, 1},{   -32, 0},{   -36, 1},{   -29, 0},
+{   -37, 1},{   -30, 0},{   -38, 1},{   -25, 0},{   -41, 1},{   -26, 0},{   -42, 1},{   -17, 0},
+{   -49, 1},{   -18, 0},{   -50, 1},{    -1, 0},{   -65, 1},{    -2, 0},{   -66, 1},{    31, 1},
+{   -97, 1},{    30, 1},{   -98, 1},{    95, 1},{  -161, 1},{    94, 1},{  -162, 1},{   223, 1},
+{  -289, 1},{   222, 1},{  -290, 1},{   479, 1},{  -545, 1},{   478, 1},{  -546, 1},{   991, 1},
+{ -1057, 1},{   990, 1},{ -1058, 1},{  2015, 1},{ -2081, 1},{  2014, 1},{ -2082, 1},{  4063, 1},
+{ -4129, 1},{  4062, 1},{ -4130, 1},{  8159, 1},{ -8225, 1},{  8158, 1},{ -8226, 1},{ 16351, 1},
+{-16417, 1},{ 16350, 1},{-16418, 1},{ 32735, 1},{-32801, 1},{ 32734, 1},{-32802, 1},{    65, 0},
+{    63, 1},{    64, 0},{    62, 1},{    66, 0},{    62, 1},{    65, 0},{    61, 1},{    68, 0},
+{    60, 1},{    67, 0},{    59, 1},{    72, 0},{    56, 1},{    71, 0},{    55, 1},{    80, 0},
+{    48, 1},{    79, 0},{    47, 1},{    96, 0},{    32, 1},{    95, 0},{    31, 1},{   128, 0},
+{     0, 1},{   127, 0},{    -1, 0},{   192, 0},{   -64, 0},{   191, 0},{   -65, 0},{   320, 0},
+{  -192, 0},{   319, 0},{  -193, 0},{   576, 0},{  -448, 0},{   575, 0},{  -449, 0},{  1088, 0},
+{  -960, 0},{  1087, 0},{  -961, 0},{  2112, 0},{ -1984, 0},{  2111, 0},{ -1985, 0},{  4160, 0},
+{ -4032, 0},{  4159, 0},{ -4033, 0},{  8256, 0},{ -8128, 0},{  8255, 0},{ -8129, 0},{ 16448, 0},
+{-16320, 0},{ 16447, 0},{-16321, 0},{ 32832, 0},{-32704, 0},{ 32831, 0},{-32705, 0},{   -63, 0},
+{   -65, 1},{   -64, 0},{   -66, 1},{   -62, 0},{   -66, 1},{   -63, 0},{   -67, 1},{   -60, 0},
+{   -68, 1},{   -61, 0},{   -69, 1},{   -56, 0},{   -72, 1},{   -57, 0},{   -73, 1},{   -48, 0},
+{   -80, 1},{   -49, 0},{   -81, 1},{   -32, 0},{   -96, 1},{   -33, 0},{   -97, 1},{     0, 1},
+{  -128, 1},{    -1, 0},{  -129, 1},{    64, 1},{  -192, 1},{    63, 1},{  -193, 1},{   192, 1},
+{  -320, 1},{   191, 1},{  -321, 1},{   448, 1},{  -576, 1},{   447, 1},{  -577, 1},{   960, 1},
+{ -1088, 1},{   959, 1},{ -1089, 1},{  1984, 1},{ -2112, 1},{  1983, 1},{ -2113, 1},{  4032, 1},
+{ -4160, 1},{  4031, 1},{ -4161, 1},{  8128, 1},{ -8256, 1},{  8127, 1},{ -8257, 1},{ 16320, 1},
+{-16448, 1},{ 16319, 1},{-16449, 1},{ 32704, 1},{-32832, 1},{ 32703, 1},{-32833, 1},{    64, 0},
+{    62, 1},{    63, 0},{    61, 1},{    65, 0},{    61, 1},{    64, 0},{    60, 1},{    67, 0},
+{    59, 1},{    66, 0},{    58, 1},{    71, 0},{    55, 1},{    70, 0},{    54, 1},{    79, 0},
+{    47, 1},{    78, 0},{    46, 1},{    95, 0},{    31, 1},{    94, 0},{    30, 1},{   127, 0},
+{    -1, 0},{   126, 0},{    -2, 0},{   191, 0},{   -65, 0},{   190, 0},{   -66, 0},{   319, 0},
+{  -193, 0},{   318, 0},{  -194, 0},{   575, 0},{  -449, 0},{   574, 0},{  -450, 0},{  1087, 0},
+{  -961, 0},{  1086, 0},{  -962, 0},{  2111, 0},{ -1985, 0},{  2110, 0},{ -1986, 0},{  4159, 0},
+{ -4033, 0},{  4158, 0},{ -4034, 0},{  8255, 0},{ -8129, 0},{  8254, 0},{ -8130, 0},{ 16447, 0},
+{-16321, 0},{ 16446, 0},{-16322, 0},{ 32831, 0},{-32705, 0},{ 32830, 0},{-32706, 0},{   -64, 0},
+{   -66, 1},{   -65, 0},{   -67, 1},{   -63, 0},{   -67, 1},{   -64, 0},{   -68, 1},{   -61, 0},
+{   -69, 1},{   -62, 0},{   -70, 1},{   -57, 0},{   -73, 1},{   -58, 0},{   -74, 1},{   -49, 0},
+{   -81, 1},{   -50, 0},{   -82, 1},{   -33, 0},{   -97, 1},{   -34, 0},{   -98, 1},{    -1, 0},
+{  -129, 1},{    -2, 0},{  -130, 1},{    63, 1},{  -193, 1},{    62, 1},{  -194, 1},{   191, 1},
+{  -321, 1},{   190, 1},{  -322, 1},{   447, 1},{  -577, 1},{   446, 1},{  -578, 1},{   959, 1},
+{ -1089, 1},{   958, 1},{ -1090, 1},{  1983, 1},{ -2113, 1},{  1982, 1},{ -2114, 1},{  4031, 1},
+{ -4161, 1},{  4030, 1},{ -4162, 1},{  8127, 1},{ -8257, 1},{  8126, 1},{ -8258, 1},{ 16319, 1},
+{-16449, 1},{ 16318, 1},{-16450, 1},{ 32703, 1},{-32833, 1},{ 32702, 1},{-32834, 1},{   129, 0},
+{   127, 1},{   128, 0},{   126, 1},{   130, 0},{   126, 1},{   129, 0},{   125, 1},{   132, 0},
+{   124, 1},{   131, 0},{   123, 1},{   136, 0},{   120, 1},{   135, 0},{   119, 1},{   144, 0},
+{   112, 1},{   143, 0},{   111, 1},{   160, 0},{    96, 1},{   159, 0},{    95, 1},{   192, 0},
+{    64, 1},{   191, 0},{    63, 1},{   256, 0},{     0, 1},{   255, 0},{    -1, 0},{   384, 0},
+{  -128, 0},{   383, 0},{  -129, 0},{   640, 0},{  -384, 0},{   639, 0},{  -385, 0},{  1152, 0},
+{  -896, 0},{  1151, 0},{  -897, 0},{  2176, 0},{ -1920, 0},{  2175, 0},{ -1921, 0},{  4224, 0},
+{ -3968, 0},{  4223, 0},{ -3969, 0},{  8320, 0},{ -8064, 0},{  8319, 0},{ -8065, 0},{ 16512, 0},
+{-16256, 0},{ 16511, 0},{-16257, 0},{ 32896, 0},{-32640, 0},{ 32895, 0},{-32641, 0},{  -127, 0},
+{  -129, 1},{  -128, 0},{  -130, 1},{  -126, 0},{  -130, 1},{  -127, 0},{  -131, 1},{  -124, 0},
+{  -132, 1},{  -125, 0},{  -133, 1},{  -120, 0},{  -136, 1},{  -121, 0},{  -137, 1},{  -112, 0},
+{  -144, 1},{  -113, 0},{  -145, 1},{   -96, 0},{  -160, 1},{   -97, 0},{  -161, 1},{   -64, 0},
+{  -192, 1},{   -65, 0},{  -193, 1},{     0, 1},{  -256, 1},{    -1, 0},{  -257, 1},{   128, 1},
+{  -384, 1},{   127, 1},{  -385, 1},{   384, 1},{  -640, 1},{   383, 1},{  -641, 1},{   896, 1},
+{ -1152, 1},{   895, 1},{ -1153, 1},{  1920, 1},{ -2176, 1},{  1919, 1},{ -2177, 1},{  3968, 1},
+{ -4224, 1},{  3967, 1},{ -4225, 1},{  8064, 1},{ -8320, 1},{  8063, 1},{ -8321, 1},{ 16256, 1},
+{-16512, 1},{ 16255, 1},{-16513, 1},{ 32640, 1},{-32896, 1},{ 32639, 1},{-32897, 1},{   128, 0},
+{   126, 1},{   127, 0},{   125, 1},{   129, 0},{   125, 1},{   128, 0},{   124, 1},{   131, 0},
+{   123, 1},{   130, 0},{   122, 1},{   135, 0},{   119, 1},{   134, 0},{   118, 1},{   143, 0},
+{   111, 1},{   142, 0},{   110, 1},{   159, 0},{    95, 1},{   158, 0},{    94, 1},{   191, 0},
+{    63, 1},{   190, 0},{    62, 1},{   255, 0},{    -1, 0},{   254, 0},{    -2, 0},{   383, 0},
+{  -129, 0},{   382, 0},{  -130, 0},{   639, 0},{  -385, 0},{   638, 0},{  -386, 0},{  1151, 0},
+{  -897, 0},{  1150, 0},{  -898, 0},{  2175, 0},{ -1921, 0},{  2174, 0},{ -1922, 0},{  4223, 0},
+{ -3969, 0},{  4222, 0},{ -3970, 0},{  8319, 0},{ -8065, 0},{  8318, 0},{ -8066, 0},{ 16511, 0},
+{-16257, 0},{ 16510, 0},{-16258, 0},{ 32895, 0},{-32641, 0},{ 32894, 0},{-32642, 0},{  -128, 0},
+{  -130, 1},{  -129, 0},{  -131, 1},{  -127, 0},{  -131, 1},{  -128, 0},{  -132, 1},{  -125, 0},
+{  -133, 1},{  -126, 0},{  -134, 1},{  -121, 0},{  -137, 1},{  -122, 0},{  -138, 1},{  -113, 0},
+{  -145, 1},{  -114, 0},{  -146, 1},{   -97, 0},{  -161, 1},{   -98, 0},{  -162, 1},{   -65, 0},
+{  -193, 1},{   -66, 0},{  -194, 1},{    -1, 0},{  -257, 1},{    -2, 0},{  -258, 1},{   127, 1},
+{  -385, 1},{   126, 1},{  -386, 1},{   383, 1},{  -641, 1},{   382, 1},{  -642, 1},{   895, 1},
+{ -1153, 1},{   894, 1},{ -1154, 1},{  1919, 1},{ -2177, 1},{  1918, 1},{ -2178, 1},{  3967, 1},
+{ -4225, 1},{  3966, 1},{ -4226, 1},{  8063, 1},{ -8321, 1},{  8062, 1},{ -8322, 1},{ 16255, 1},
+{-16513, 1},{ 16254, 1},{-16514, 1},{ 32639, 1},{-32897, 1},{ 32638, 1},{-32898, 1},{   257, 0},
+{   255, 1},{   256, 0},{   254, 1},{   258, 0},{   254, 1},{   257, 0},{   253, 1},{   260, 0},
+{   252, 1},{   259, 0},{   251, 1},{   264, 0},{   248, 1},{   263, 0},{   247, 1},{   272, 0},
+{   240, 1},{   271, 0},{   239, 1},{   288, 0},{   224, 1},{   287, 0},{   223, 1},{   320, 0},
+{   192, 1},{   319, 0},{   191, 1},{   384, 0},{   128, 1},{   383, 0},{   127, 1},{   512, 0},
+{     0, 1},{   511, 0},{    -1, 0},{   768, 0},{  -256, 0},{   767, 0},{  -257, 0},{  1280, 0},
+{  -768, 0},{  1279, 0},{  -769, 0},{  2304, 0},{ -1792, 0},{  2303, 0},{ -1793, 0},{  4352, 0},
+{ -3840, 0},{  4351, 0},{ -3841, 0},{  8448, 0},{ -7936, 0},{  8447, 0},{ -7937, 0},{ 16640, 0},
+{-16128, 0},{ 16639, 0},{-16129, 0},{ 33024, 0},{-32512, 0},{ 33023, 0},{-32513, 0},{  -255, 0},
+{  -257, 1},{  -256, 0},{  -258, 1},{  -254, 0},{  -258, 1},{  -255, 0},{  -259, 1},{  -252, 0},
+{  -260, 1},{  -253, 0},{  -261, 1},{  -248, 0},{  -264, 1},{  -249, 0},{  -265, 1},{  -240, 0},
+{  -272, 1},{  -241, 0},{  -273, 1},{  -224, 0},{  -288, 1},{  -225, 0},{  -289, 1},{  -192, 0},
+{  -320, 1},{  -193, 0},{  -321, 1},{  -128, 0},{  -384, 1},{  -129, 0},{  -385, 1},{     0, 1},
+{  -512, 1},{    -1, 0},{  -513, 1},{   256, 1},{  -768, 1},{   255, 1},{  -769, 1},{   768, 1},
+{ -1280, 1},{   767, 1},{ -1281, 1},{  1792, 1},{ -2304, 1},{  1791, 1},{ -2305, 1},{  3840, 1},
+{ -4352, 1},{  3839, 1},{ -4353, 1},{  7936, 1},{ -8448, 1},{  7935, 1},{ -8449, 1},{ 16128, 1},
+{-16640, 1},{ 16127, 1},{-16641, 1},{ 32512, 1},{-33024, 1},{ 32511, 1},{-33025, 1},{   256, 0},
+{   254, 1},{   255, 0},{   253, 1},{   257, 0},{   253, 1},{   256, 0},{   252, 1},{   259, 0},
+{   251, 1},{   258, 0},{   250, 1},{   263, 0},{   247, 1},{   262, 0},{   246, 1},{   271, 0},
+{   239, 1},{   270, 0},{   238, 1},{   287, 0},{   223, 1},{   286, 0},{   222, 1},{   319, 0},
+{   191, 1},{   318, 0},{   190, 1},{   383, 0},{   127, 1},{   382, 0},{   126, 1},{   511, 0},
+{    -1, 0},{   510, 0},{    -2, 0},{   767, 0},{  -257, 0},{   766, 0},{  -258, 0},{  1279, 0},
+{  -769, 0},{  1278, 0},{  -770, 0},{  2303, 0},{ -1793, 0},{  2302, 0},{ -1794, 0},{  4351, 0},
+{ -3841, 0},{  4350, 0},{ -3842, 0},{  8447, 0},{ -7937, 0},{  8446, 0},{ -7938, 0},{ 16639, 0},
+{-16129, 0},{ 16638, 0},{-16130, 0},{ 33023, 0},{-32513, 0},{ 33022, 0},{-32514, 0},{  -256, 0},
+{  -258, 1},{  -257, 0},{  -259, 1},{  -255, 0},{  -259, 1},{  -256, 0},{  -260, 1},{  -253, 0},
+{  -261, 1},{  -254, 0},{  -262, 1},{  -249, 0},{  -265, 1},{  -250, 0},{  -266, 1},{  -241, 0},
+{  -273, 1},{  -242, 0},{  -274, 1},{  -225, 0},{  -289, 1},{  -226, 0},{  -290, 1},{  -193, 0},
+{  -321, 1},{  -194, 0},{  -322, 1},{  -129, 0},{  -385, 1},{  -130, 0},{  -386, 1},{    -1, 0},
+{  -513, 1},{    -2, 0},{  -514, 1},{   255, 1},{  -769, 1},{   254, 1},{  -770, 1},{   767, 1},
+{ -1281, 1},{   766, 1},{ -1282, 1},{  1791, 1},{ -2305, 1},{  1790, 1},{ -2306, 1},{  3839, 1},
+{ -4353, 1},{  3838, 1},{ -4354, 1},{  7935, 1},{ -8449, 1},{  7934, 1},{ -8450, 1},{ 16127, 1},
+{-16641, 1},{ 16126, 1},{-16642, 1},{ 32511, 1},{-33025, 1},{ 32510, 1},{-33026, 1},{   513, 0},
+{   511, 1},{   512, 0},{   510, 1},{   514, 0},{   510, 1},{   513, 0},{   509, 1},{   516, 0},
+{   508, 1},{   515, 0},{   507, 1},{   520, 0},{   504, 1},{   519, 0},{   503, 1},{   528, 0},
+{   496, 1},{   527, 0},{   495, 1},{   544, 0},{   480, 1},{   543, 0},{   479, 1},{   576, 0},
+{   448, 1},{   575, 0},{   447, 1},{   640, 0},{   384, 1},{   639, 0},{   383, 1},{   768, 0},
+{   256, 1},{   767, 0},{   255, 1},{  1024, 0},{     0, 1},{  1023, 0},{    -1, 0},{  1536, 0},
+{  -512, 0},{  1535, 0},{  -513, 0},{  2560, 0},{ -1536, 0},{  2559, 0},{ -1537, 0},{  4608, 0},
+{ -3584, 0},{  4607, 0},{ -3585, 0},{  8704, 0},{ -7680, 0},{  8703, 0},{ -7681, 0},{ 16896, 0},
+{-15872, 0},{ 16895, 0},{-15873, 0},{ 33280, 0},{-32256, 0},{ 33279, 0},{-32257, 0},{  -511, 0},
+{  -513, 1},{  -512, 0},{  -514, 1},{  -510, 0},{  -514, 1},{  -511, 0},{  -515, 1},{  -508, 0},
+{  -516, 1},{  -509, 0},{  -517, 1},{  -504, 0},{  -520, 1},{  -505, 0},{  -521, 1},{  -496, 0},
+{  -528, 1},{  -497, 0},{  -529, 1},{  -480, 0},{  -544, 1},{  -481, 0},{  -545, 1},{  -448, 0},
+{  -576, 1},{  -449, 0},{  -577, 1},{  -384, 0},{  -640, 1},{  -385, 0},{  -641, 1},{  -256, 0},
+{  -768, 1},{  -257, 0},{  -769, 1},{     0, 1},{ -1024, 1},{    -1, 0},{ -1025, 1},{   512, 1},
+{ -1536, 1},{   511, 1},{ -1537, 1},{  1536, 1},{ -2560, 1},{  1535, 1},{ -2561, 1},{  3584, 1},
+{ -4608, 1},{  3583, 1},{ -4609, 1},{  7680, 1},{ -8704, 1},{  7679, 1},{ -8705, 1},{ 15872, 1},
+{-16896, 1},{ 15871, 1},{-16897, 1},{ 32256, 1},{-33280, 1},{ 32255, 1},{-33281, 1},{   512, 0},
+{   510, 1},{   511, 0},{   509, 1},{   513, 0},{   509, 1},{   512, 0},{   508, 1},{   515, 0},
+{   507, 1},{   514, 0},{   506, 1},{   519, 0},{   503, 1},{   518, 0},{   502, 1},{   527, 0},
+{   495, 1},{   526, 0},{   494, 1},{   543, 0},{   479, 1},{   542, 0},{   478, 1},{   575, 0},
+{   447, 1},{   574, 0},{   446, 1},{   639, 0},{   383, 1},{   638, 0},{   382, 1},{   767, 0},
+{   255, 1},{   766, 0},{   254, 1},{  1023, 0},{    -1, 0},{  1022, 0},{    -2, 0},{  1535, 0},
+{  -513, 0},{  1534, 0},{  -514, 0},{  2559, 0},{ -1537, 0},{  2558, 0},{ -1538, 0},{  4607, 0},
+{ -3585, 0},{  4606, 0},{ -3586, 0},{  8703, 0},{ -7681, 0},{  8702, 0},{ -7682, 0},{ 16895, 0},
+{-15873, 0},{ 16894, 0},{-15874, 0},{ 33279, 0},{-32257, 0},{ 33278, 0},{-32258, 0},{  -512, 0},
+{  -514, 1},{  -513, 0},{  -515, 1},{  -511, 0},{  -515, 1},{  -512, 0},{  -516, 1},{  -509, 0},
+{  -517, 1},{  -510, 0},{  -518, 1},{  -505, 0},{  -521, 1},{  -506, 0},{  -522, 1},{  -497, 0},
+{  -529, 1},{  -498, 0},{  -530, 1},{  -481, 0},{  -545, 1},{  -482, 0},{  -546, 1},{  -449, 0},
+{  -577, 1},{  -450, 0},{  -578, 1},{  -385, 0},{  -641, 1},{  -386, 0},{  -642, 1},{  -257, 0},
+{  -769, 1},{  -258, 0},{  -770, 1},{    -1, 0},{ -1025, 1},{    -2, 0},{ -1026, 1},{   511, 1},
+{ -1537, 1},{   510, 1},{ -1538, 1},{  1535, 1},{ -2561, 1},{  1534, 1},{ -2562, 1},{  3583, 1},
+{ -4609, 1},{  3582, 1},{ -4610, 1},{  7679, 1},{ -8705, 1},{  7678, 1},{ -8706, 1},{ 15871, 1},
+{-16897, 1},{ 15870, 1},{-16898, 1},{ 32255, 1},{-33281, 1},{ 32254, 1},{-33282, 1},{  1025, 0},
+{  1023, 1},{  1024, 0},{  1022, 1},{  1026, 0},{  1022, 1},{  1025, 0},{  1021, 1},{  1028, 0},
+{  1020, 1},{  1027, 0},{  1019, 1},{  1032, 0},{  1016, 1},{  1031, 0},{  1015, 1},{  1040, 0},
+{  1008, 1},{  1039, 0},{  1007, 1},{  1056, 0},{   992, 1},{  1055, 0},{   991, 1},{  1088, 0},
+{   960, 1},{  1087, 0},{   959, 1},{  1152, 0},{   896, 1},{  1151, 0},{   895, 1},{  1280, 0},
+{   768, 1},{  1279, 0},{   767, 1},{  1536, 0},{   512, 1},{  1535, 0},{   511, 1},{  2048, 0},
+{     0, 1},{  2047, 0},{    -1, 0},{  3072, 0},{ -1024, 0},{  3071, 0},{ -1025, 0},{  5120, 0},
+{ -3072, 0},{  5119, 0},{ -3073, 0},{  9216, 0},{ -7168, 0},{  9215, 0},{ -7169, 0},{ 17408, 0},
+{-15360, 0},{ 17407, 0},{-15361, 0},{ 33792, 0},{-31744, 0},{ 33791, 0},{-31745, 0},{ -1023, 0},
+{ -1025, 1},{ -1024, 0},{ -1026, 1},{ -1022, 0},{ -1026, 1},{ -1023, 0},{ -1027, 1},{ -1020, 0},
+{ -1028, 1},{ -1021, 0},{ -1029, 1},{ -1016, 0},{ -1032, 1},{ -1017, 0},{ -1033, 1},{ -1008, 0},
+{ -1040, 1},{ -1009, 0},{ -1041, 1},{  -992, 0},{ -1056, 1},{  -993, 0},{ -1057, 1},{  -960, 0},
+{ -1088, 1},{  -961, 0},{ -1089, 1},{  -896, 0},{ -1152, 1},{  -897, 0},{ -1153, 1},{  -768, 0},
+{ -1280, 1},{  -769, 0},{ -1281, 1},{  -512, 0},{ -1536, 1},{  -513, 0},{ -1537, 1},{     0, 1},
+{ -2048, 1},{    -1, 0},{ -2049, 1},{  1024, 1},{ -3072, 1},{  1023, 1},{ -3073, 1},{  3072, 1},
+{ -5120, 1},{  3071, 1},{ -5121, 1},{  7168, 1},{ -9216, 1},{  7167, 1},{ -9217, 1},{ 15360, 1},
+{-17408, 1},{ 15359, 1},{-17409, 1},{ 31744, 1},{-33792, 1},{ 31743, 1},{-33793, 1},{  1024, 0},
+{  1022, 1},{  1023, 0},{  1021, 1},{  1025, 0},{  1021, 1},{  1024, 0},{  1020, 1},{  1027, 0},
+{  1019, 1},{  1026, 0},{  1018, 1},{  1031, 0},{  1015, 1},{  1030, 0},{  1014, 1},{  1039, 0},
+{  1007, 1},{  1038, 0},{  1006, 1},{  1055, 0},{   991, 1},{  1054, 0},{   990, 1},{  1087, 0},
+{   959, 1},{  1086, 0},{   958, 1},{  1151, 0},{   895, 1},{  1150, 0},{   894, 1},{  1279, 0},
+{   767, 1},{  1278, 0},{   766, 1},{  1535, 0},{   511, 1},{  1534, 0},{   510, 1},{  2047, 0},
+{    -1, 0},{  2046, 0},{    -2, 0},{  3071, 0},{ -1025, 0},{  3070, 0},{ -1026, 0},{  5119, 0},
+{ -3073, 0},{  5118, 0},{ -3074, 0},{  9215, 0},{ -7169, 0},{  9214, 0},{ -7170, 0},{ 17407, 0},
+{-15361, 0},{ 17406, 0},{-15362, 0},{ 33791, 0},{-31745, 0},{ 33790, 0},{-31746, 0},{ -1024, 0},
+{ -1026, 1},{ -1025, 0},{ -1027, 1},{ -1023, 0},{ -1027, 1},{ -1024, 0},{ -1028, 1},{ -1021, 0},
+{ -1029, 1},{ -1022, 0},{ -1030, 1},{ -1017, 0},{ -1033, 1},{ -1018, 0},{ -1034, 1},{ -1009, 0},
+{ -1041, 1},{ -1010, 0},{ -1042, 1},{  -993, 0},{ -1057, 1},{  -994, 0},{ -1058, 1},{  -961, 0},
+{ -1089, 1},{  -962, 0},{ -1090, 1},{  -897, 0},{ -1153, 1},{  -898, 0},{ -1154, 1},{  -769, 0},
+{ -1281, 1},{  -770, 0},{ -1282, 1},{  -513, 0},{ -1537, 1},{  -514, 0},{ -1538, 1},{    -1, 0},
+{ -2049, 1},{    -2, 0},{ -2050, 1},{  1023, 1},{ -3073, 1},{  1022, 1},{ -3074, 1},{  3071, 1},
+{ -5121, 1},{  3070, 1},{ -5122, 1},{  7167, 1},{ -9217, 1},{  7166, 1},{ -9218, 1},{ 15359, 1},
+{-17409, 1},{ 15358, 1},{-17410, 1},{ 31743, 1},{-33793, 1},{ 31742, 1},{-33794, 1},{  2049, 0},
+{  2047, 1},{  2048, 0},{  2046, 1},{  2050, 0},{  2046, 1},{  2049, 0},{  2045, 1},{  2052, 0},
+{  2044, 1},{  2051, 0},{  2043, 1},{  2056, 0},{  2040, 1},{  2055, 0},{  2039, 1},{  2064, 0},
+{  2032, 1},{  2063, 0},{  2031, 1},{  2080, 0},{  2016, 1},{  2079, 0},{  2015, 1},{  2112, 0},
+{  1984, 1},{  2111, 0},{  1983, 1},{  2176, 0},{  1920, 1},{  2175, 0},{  1919, 1},{  2304, 0},
+{  1792, 1},{  2303, 0},{  1791, 1},{  2560, 0},{  1536, 1},{  2559, 0},{  1535, 1},{  3072, 0},
+{  1024, 1},{  3071, 0},{  1023, 1},{  4096, 0},{     0, 1},{  4095, 0},{    -1, 0},{  6144, 0},
+{ -2048, 0},{  6143, 0},{ -2049, 0},{ 10240, 0},{ -6144, 0},{ 10239, 0},{ -6145, 0},{ 18432, 0},
+{-14336, 0},{ 18431, 0},{-14337, 0},{ 34816, 0},{-30720, 0},{ 34815, 0},{-30721, 0},{ -2047, 0},
+{ -2049, 1},{ -2048, 0},{ -2050, 1},{ -2046, 0},{ -2050, 1},{ -2047, 0},{ -2051, 1},{ -2044, 0},
+{ -2052, 1},{ -2045, 0},{ -2053, 1},{ -2040, 0},{ -2056, 1},{ -2041, 0},{ -2057, 1},{ -2032, 0},
+{ -2064, 1},{ -2033, 0},{ -2065, 1},{ -2016, 0},{ -2080, 1},{ -2017, 0},{ -2081, 1},{ -1984, 0},
+{ -2112, 1},{ -1985, 0},{ -2113, 1},{ -1920, 0},{ -2176, 1},{ -1921, 0},{ -2177, 1},{ -1792, 0},
+{ -2304, 1},{ -1793, 0},{ -2305, 1},{ -1536, 0},{ -2560, 1},{ -1537, 0},{ -2561, 1},{ -1024, 0},
+{ -3072, 1},{ -1025, 0},{ -3073, 1},{     0, 1},{ -4096, 1},{    -1, 0},{ -4097, 1},{  2048, 1},
+{ -6144, 1},{  2047, 1},{ -6145, 1},{  6144, 1},{-10240, 1},{  6143, 1},{-10241, 1},{ 14336, 1},
+{-18432, 1},{ 14335, 1},{-18433, 1},{ 30720, 1},{-34816, 1},{ 30719, 1},{-34817, 1},{  2048, 0},
+{  2046, 1},{  2047, 0},{  2045, 1},{  2049, 0},{  2045, 1},{  2048, 0},{  2044, 1},{  2051, 0},
+{  2043, 1},{  2050, 0},{  2042, 1},{  2055, 0},{  2039, 1},{  2054, 0},{  2038, 1},{  2063, 0},
+{  2031, 1},{  2062, 0},{  2030, 1},{  2079, 0},{  2015, 1},{  2078, 0},{  2014, 1},{  2111, 0},
+{  1983, 1},{  2110, 0},{  1982, 1},{  2175, 0},{  1919, 1},{  2174, 0},{  1918, 1},{  2303, 0},
+{  1791, 1},{  2302, 0},{  1790, 1},{  2559, 0},{  1535, 1},{  2558, 0},{  1534, 1},{  3071, 0},
+{  1023, 1},{  3070, 0},{  1022, 1},{  4095, 0},{    -1, 0},{  4094, 0},{    -2, 0},{  6143, 0},
+{ -2049, 0},{  6142, 0},{ -2050, 0},{ 10239, 0},{ -6145, 0},{ 10238, 0},{ -6146, 0},{ 18431, 0},
+{-14337, 0},{ 18430, 0},{-14338, 0},{ 34815, 0},{-30721, 0},{ 34814, 0},{-30722, 0},{ -2048, 0},
+{ -2050, 1},{ -2049, 0},{ -2051, 1},{ -2047, 0},{ -2051, 1},{ -2048, 0},{ -2052, 1},{ -2045, 0},
+{ -2053, 1},{ -2046, 0},{ -2054, 1},{ -2041, 0},{ -2057, 1},{ -2042, 0},{ -2058, 1},{ -2033, 0},
+{ -2065, 1},{ -2034, 0},{ -2066, 1},{ -2017, 0},{ -2081, 1},{ -2018, 0},{ -2082, 1},{ -1985, 0},
+{ -2113, 1},{ -1986, 0},{ -2114, 1},{ -1921, 0},{ -2177, 1},{ -1922, 0},{ -2178, 1},{ -1793, 0},
+{ -2305, 1},{ -1794, 0},{ -2306, 1},{ -1537, 0},{ -2561, 1},{ -1538, 0},{ -2562, 1},{ -1025, 0},
+{ -3073, 1},{ -1026, 0},{ -3074, 1},{    -1, 0},{ -4097, 1},{    -2, 0},{ -4098, 1},{  2047, 1},
+{ -6145, 1},{  2046, 1},{ -6146, 1},{  6143, 1},{-10241, 1},{  6142, 1},{-10242, 1},{ 14335, 1},
+{-18433, 1},{ 14334, 1},{-18434, 1},{ 30719, 1},{-34817, 1},{ 30718, 1},{-34818, 1},{  4097, 0},
+{  4095, 1},{  4096, 0},{  4094, 1},{  4098, 0},{  4094, 1},{  4097, 0},{  4093, 1},{  4100, 0},
+{  4092, 1},{  4099, 0},{  4091, 1},{  4104, 0},{  4088, 1},{  4103, 0},{  4087, 1},{  4112, 0},
+{  4080, 1},{  4111, 0},{  4079, 1},{  4128, 0},{  4064, 1},{  4127, 0},{  4063, 1},{  4160, 0},
+{  4032, 1},{  4159, 0},{  4031, 1},{  4224, 0},{  3968, 1},{  4223, 0},{  3967, 1},{  4352, 0},
+{  3840, 1},{  4351, 0},{  3839, 1},{  4608, 0},{  3584, 1},{  4607, 0},{  3583, 1},{  5120, 0},
+{  3072, 1},{  5119, 0},{  3071, 1},{  6144, 0},{  2048, 1},{  6143, 0},{  2047, 1},{  8192, 0},
+{     0, 1},{  8191, 0},{    -1, 0},{ 12288, 0},{ -4096, 0},{ 12287, 0},{ -4097, 0},{ 20480, 0},
+{-12288, 0},{ 20479, 0},{-12289, 0},{ 36864, 0},{-28672, 0},{ 36863, 0},{-28673, 0},{ -4095, 0},
+{ -4097, 1},{ -4096, 0},{ -4098, 1},{ -4094, 0},{ -4098, 1},{ -4095, 0},{ -4099, 1},{ -4092, 0},
+{ -4100, 1},{ -4093, 0},{ -4101, 1},{ -4088, 0},{ -4104, 1},{ -4089, 0},{ -4105, 1},{ -4080, 0},
+{ -4112, 1},{ -4081, 0},{ -4113, 1},{ -4064, 0},{ -4128, 1},{ -4065, 0},{ -4129, 1},{ -4032, 0},
+{ -4160, 1},{ -4033, 0},{ -4161, 1},{ -3968, 0},{ -4224, 1},{ -3969, 0},{ -4225, 1},{ -3840, 0},
+{ -4352, 1},{ -3841, 0},{ -4353, 1},{ -3584, 0},{ -4608, 1},{ -3585, 0},{ -4609, 1},{ -3072, 0},
+{ -5120, 1},{ -3073, 0},{ -5121, 1},{ -2048, 0},{ -6144, 1},{ -2049, 0},{ -6145, 1},{     0, 1},
+{ -8192, 1},{    -1, 0},{ -8193, 1},{  4096, 1},{-12288, 1},{  4095, 1},{-12289, 1},{ 12288, 1},
+{-20480, 1},{ 12287, 1},{-20481, 1},{ 28672, 1},{-36864, 1},{ 28671, 1},{-36865, 1},{  4096, 0},
+{  4094, 1},{  4095, 0},{  4093, 1},{  4097, 0},{  4093, 1},{  4096, 0},{  4092, 1},{  4099, 0},
+{  4091, 1},{  4098, 0},{  4090, 1},{  4103, 0},{  4087, 1},{  4102, 0},{  4086, 1},{  4111, 0},
+{  4079, 1},{  4110, 0},{  4078, 1},{  4127, 0},{  4063, 1},{  4126, 0},{  4062, 1},{  4159, 0},
+{  4031, 1},{  4158, 0},{  4030, 1},{  4223, 0},{  3967, 1},{  4222, 0},{  3966, 1},{  4351, 0},
+{  3839, 1},{  4350, 0},{  3838, 1},{  4607, 0},{  3583, 1},{  4606, 0},{  3582, 1},{  5119, 0},
+{  3071, 1},{  5118, 0},{  3070, 1},{  6143, 0},{  2047, 1},{  6142, 0},{  2046, 1},{  8191, 0},
+{    -1, 0},{  8190, 0},{    -2, 0},{ 12287, 0},{ -4097, 0},{ 12286, 0},{ -4098, 0},{ 20479, 0},
+{-12289, 0},{ 20478, 0},{-12290, 0},{ 36863, 0},{-28673, 0},{ 36862, 0},{-28674, 0},{ -4096, 0},
+{ -4098, 1},{ -4097, 0},{ -4099, 1},{ -4095, 0},{ -4099, 1},{ -4096, 0},{ -4100, 1},{ -4093, 0},
+{ -4101, 1},{ -4094, 0},{ -4102, 1},{ -4089, 0},{ -4105, 1},{ -4090, 0},{ -4106, 1},{ -4081, 0},
+{ -4113, 1},{ -4082, 0},{ -4114, 1},{ -4065, 0},{ -4129, 1},{ -4066, 0},{ -4130, 1},{ -4033, 0},
+{ -4161, 1},{ -4034, 0},{ -4162, 1},{ -3969, 0},{ -4225, 1},{ -3970, 0},{ -4226, 1},{ -3841, 0},
+{ -4353, 1},{ -3842, 0},{ -4354, 1},{ -3585, 0},{ -4609, 1},{ -3586, 0},{ -4610, 1},{ -3073, 0},
+{ -5121, 1},{ -3074, 0},{ -5122, 1},{ -2049, 0},{ -6145, 1},{ -2050, 0},{ -6146, 1},{    -1, 0},
+{ -8193, 1},{    -2, 0},{ -8194, 1},{  4095, 1},{-12289, 1},{  4094, 1},{-12290, 1},{ 12287, 1},
+{-20481, 1},{ 12286, 1},{-20482, 1},{ 28671, 1},{-36865, 1},{ 28670, 1},{-36866, 1},{  8193, 0},
+{  8191, 1},{  8192, 0},{  8190, 1},{  8194, 0},{  8190, 1},{  8193, 0},{  8189, 1},{  8196, 0},
+{  8188, 1},{  8195, 0},{  8187, 1},{  8200, 0},{  8184, 1},{  8199, 0},{  8183, 1},{  8208, 0},
+{  8176, 1},{  8207, 0},{  8175, 1},{  8224, 0},{  8160, 1},{  8223, 0},{  8159, 1},{  8256, 0},
+{  8128, 1},{  8255, 0},{  8127, 1},{  8320, 0},{  8064, 1},{  8319, 0},{  8063, 1},{  8448, 0},
+{  7936, 1},{  8447, 0},{  7935, 1},{  8704, 0},{  7680, 1},{  8703, 0},{  7679, 1},{  9216, 0},
+{  7168, 1},{  9215, 0},{  7167, 1},{ 10240, 0},{  6144, 1},{ 10239, 0},{  6143, 1},{ 12288, 0},
+{  4096, 1},{ 12287, 0},{  4095, 1},{ 16384, 0},{     0, 1},{ 16383, 0},{    -1, 0},{ 24576, 0},
+{ -8192, 0},{ 24575, 0},{ -8193, 0},{ 40960, 0},{-24576, 0},{ 40959, 0},{-24577, 0},{ -8191, 0},
+{ -8193, 1},{ -8192, 0},{ -8194, 1},{ -8190, 0},{ -8194, 1},{ -8191, 0},{ -8195, 1},{ -8188, 0},
+{ -8196, 1},{ -8189, 0},{ -8197, 1},{ -8184, 0},{ -8200, 1},{ -8185, 0},{ -8201, 1},{ -8176, 0},
+{ -8208, 1},{ -8177, 0},{ -8209, 1},{ -8160, 0},{ -8224, 1},{ -8161, 0},{ -8225, 1},{ -8128, 0},
+{ -8256, 1},{ -8129, 0},{ -8257, 1},{ -8064, 0},{ -8320, 1},{ -8065, 0},{ -8321, 1},{ -7936, 0},
+{ -8448, 1},{ -7937, 0},{ -8449, 1},{ -7680, 0},{ -8704, 1},{ -7681, 0},{ -8705, 1},{ -7168, 0},
+{ -9216, 1},{ -7169, 0},{ -9217, 1},{ -6144, 0},{-10240, 1},{ -6145, 0},{-10241, 1},{ -4096, 0},
+{-12288, 1},{ -4097, 0},{-12289, 1},{     0, 1},{-16384, 1},{    -1, 0},{-16385, 1},{  8192, 1},
+{-24576, 1},{  8191, 1},{-24577, 1},{ 24576, 1},{-40960, 1},{ 24575, 1},{-40961, 1},{  8192, 0},
+{  8190, 1},{  8191, 0},{  8189, 1},{  8193, 0},{  8189, 1},{  8192, 0},{  8188, 1},{  8195, 0},
+{  8187, 1},{  8194, 0},{  8186, 1},{  8199, 0},{  8183, 1},{  8198, 0},{  8182, 1},{  8207, 0},
+{  8175, 1},{  8206, 0},{  8174, 1},{  8223, 0},{  8159, 1},{  8222, 0},{  8158, 1},{  8255, 0},
+{  8127, 1},{  8254, 0},{  8126, 1},{  8319, 0},{  8063, 1},{  8318, 0},{  8062, 1},{  8447, 0},
+{  7935, 1},{  8446, 0},{  7934, 1},{  8703, 0},{  7679, 1},{  8702, 0},{  7678, 1},{  9215, 0},
+{  7167, 1},{  9214, 0},{  7166, 1},{ 10239, 0},{  6143, 1},{ 10238, 0},{  6142, 1},{ 12287, 0},
+{  4095, 1},{ 12286, 0},{  4094, 1},{ 16383, 0},{    -1, 0},{ 16382, 0},{    -2, 0},{ 24575, 0},
+{ -8193, 0},{ 24574, 0},{ -8194, 0},{ 40959, 0},{-24577, 0},{ 40958, 0},{-24578, 0},{ -8192, 0},
+{ -8194, 1},{ -8193, 0},{ -8195, 1},{ -8191, 0},{ -8195, 1},{ -8192, 0},{ -8196, 1},{ -8189, 0},
+{ -8197, 1},{ -8190, 0},{ -8198, 1},{ -8185, 0},{ -8201, 1},{ -8186, 0},{ -8202, 1},{ -8177, 0},
+{ -8209, 1},{ -8178, 0},{ -8210, 1},{ -8161, 0},{ -8225, 1},{ -8162, 0},{ -8226, 1},{ -8129, 0},
+{ -8257, 1},{ -8130, 0},{ -8258, 1},{ -8065, 0},{ -8321, 1},{ -8066, 0},{ -8322, 1},{ -7937, 0},
+{ -8449, 1},{ -7938, 0},{ -8450, 1},{ -7681, 0},{ -8705, 1},{ -7682, 0},{ -8706, 1},{ -7169, 0},
+{ -9217, 1},{ -7170, 0},{ -9218, 1},{ -6145, 0},{-10241, 1},{ -6146, 0},{-10242, 1},{ -4097, 0},
+{-12289, 1},{ -4098, 0},{-12290, 1},{    -1, 0},{-16385, 1},{    -2, 0},{-16386, 1},{  8191, 1},
+{-24577, 1},{  8190, 1},{-24578, 1},{ 24575, 1},{-40961, 1},{ 24574, 1},{-40962, 1},{ 16385, 0},
+{ 16383, 1},{ 16384, 0},{ 16382, 1},{ 16386, 0},{ 16382, 1},{ 16385, 0},{ 16381, 1},{ 16388, 0},
+{ 16380, 1},{ 16387, 0},{ 16379, 1},{ 16392, 0},{ 16376, 1},{ 16391, 0},{ 16375, 1},{ 16400, 0},
+{ 16368, 1},{ 16399, 0},{ 16367, 1},{ 16416, 0},{ 16352, 1},{ 16415, 0},{ 16351, 1},{ 16448, 0},
+{ 16320, 1},{ 16447, 0},{ 16319, 1},{ 16512, 0},{ 16256, 1},{ 16511, 0},{ 16255, 1},{ 16640, 0},
+{ 16128, 1},{ 16639, 0},{ 16127, 1},{ 16896, 0},{ 15872, 1},{ 16895, 0},{ 15871, 1},{ 17408, 0},
+{ 15360, 1},{ 17407, 0},{ 15359, 1},{ 18432, 0},{ 14336, 1},{ 18431, 0},{ 14335, 1},{ 20480, 0},
+{ 12288, 1},{ 20479, 0},{ 12287, 1},{ 24576, 0},{  8192, 1},{ 24575, 0},{  8191, 1},{ 32768, 0},
+{     0, 1},{ 32767, 0},{    -1, 0},{ 49152, 0},{-16384, 0},{ 49151, 0},{-16385, 0},{-16383, 0},
+{-16385, 1},{-16384, 0},{-16386, 1},{-16382, 0},{-16386, 1},{-16383, 0},{-16387, 1},{-16380, 0},
+{-16388, 1},{-16381, 0},{-16389, 1},{-16376, 0},{-16392, 1},{-16377, 0},{-16393, 1},{-16368, 0},
+{-16400, 1},{-16369, 0},{-16401, 1},{-16352, 0},{-16416, 1},{-16353, 0},{-16417, 1},{-16320, 0},
+{-16448, 1},{-16321, 0},{-16449, 1},{-16256, 0},{-16512, 1},{-16257, 0},{-16513, 1},{-16128, 0},
+{-16640, 1},{-16129, 0},{-16641, 1},{-15872, 0},{-16896, 1},{-15873, 0},{-16897, 1},{-15360, 0},
+{-17408, 1},{-15361, 0},{-17409, 1},{-14336, 0},{-18432, 1},{-14337, 0},{-18433, 1},{-12288, 0},
+{-20480, 1},{-12289, 0},{-20481, 1},{ -8192, 0},{-24576, 1},{ -8193, 0},{-24577, 1},{     0, 1},
+{-32768, 1},{    -1, 0},{-32769, 1},{ 16384, 1},{-49152, 1},{ 16383, 1},{-49153, 1},{ 16384, 0},
+{ 16382, 1},{ 16383, 0},{ 16381, 1},{ 16385, 0},{ 16381, 1},{ 16384, 0},{ 16380, 1},{ 16387, 0},
+{ 16379, 1},{ 16386, 0},{ 16378, 1},{ 16391, 0},{ 16375, 1},{ 16390, 0},{ 16374, 1},{ 16399, 0},
+{ 16367, 1},{ 16398, 0},{ 16366, 1},{ 16415, 0},{ 16351, 1},{ 16414, 0},{ 16350, 1},{ 16447, 0},
+{ 16319, 1},{ 16446, 0},{ 16318, 1},{ 16511, 0},{ 16255, 1},{ 16510, 0},{ 16254, 1},{ 16639, 0},
+{ 16127, 1},{ 16638, 0},{ 16126, 1},{ 16895, 0},{ 15871, 1},{ 16894, 0},{ 15870, 1},{ 17407, 0},
+{ 15359, 1},{ 17406, 0},{ 15358, 1},{ 18431, 0},{ 14335, 1},{ 18430, 0},{ 14334, 1},{ 20479, 0},
+{ 12287, 1},{ 20478, 0},{ 12286, 1},{ 24575, 0},{  8191, 1},{ 24574, 0},{  8190, 1},{ 32767, 0},
+{    -1, 0},{ 32766, 0},{    -2, 0},{ 49151, 0},{-16385, 0},{ 49150, 0},{-16386, 0},{-16384, 0},
+{-16386, 1},{-16385, 0},{-16387, 1},{-16383, 0},{-16387, 1},{-16384, 0},{-16388, 1},{-16381, 0},
+{-16389, 1},{-16382, 0},{-16390, 1},{-16377, 0},{-16393, 1},{-16378, 0},{-16394, 1},{-16369, 0},
+{-16401, 1},{-16370, 0},{-16402, 1},{-16353, 0},{-16417, 1},{-16354, 0},{-16418, 1},{-16321, 0},
+{-16449, 1},{-16322, 0},{-16450, 1},{-16257, 0},{-16513, 1},{-16258, 0},{-16514, 1},{-16129, 0},
+{-16641, 1},{-16130, 0},{-16642, 1},{-15873, 0},{-16897, 1},{-15874, 0},{-16898, 1},{-15361, 0},
+{-17409, 1},{-15362, 0},{-17410, 1},{-14337, 0},{-18433, 1},{-14338, 0},{-18434, 1},{-12289, 0},
+{-20481, 1},{-12290, 0},{-20482, 1},{ -8193, 0},{-24577, 1},{ -8194, 0},{-24578, 1},{    -1, 0},
+{-32769, 1},{    -2, 0},{-32770, 1},{ 16383, 1},{-49153, 1},{ 16382, 1},{-49154, 1},{ 32769, 0},
+{ 32767, 1},{ 32768, 0},{ 32766, 1},{ 32770, 0},{ 32766, 1},{ 32769, 0},{ 32765, 1},{ 32772, 0},
+{ 32764, 1},{ 32771, 0},{ 32763, 1},{ 32776, 0},{ 32760, 1},{ 32775, 0},{ 32759, 1},{ 32784, 0},
+{ 32752, 1},{ 32783, 0},{ 32751, 1},{ 32800, 0},{ 32736, 1},{ 32799, 0},{ 32735, 1},{ 32832, 0},
+{ 32704, 1},{ 32831, 0},{ 32703, 1},{ 32896, 0},{ 32640, 1},{ 32895, 0},{ 32639, 1},{ 33024, 0},
+{ 32512, 1},{ 33023, 0},{ 32511, 1},{ 33280, 0},{ 32256, 1},{ 33279, 0},{ 32255, 1},{ 33792, 0},
+{ 31744, 1},{ 33791, 0},{ 31743, 1},{ 34816, 0},{ 30720, 1},{ 34815, 0},{ 30719, 1},{ 36864, 0},
+{ 28672, 1},{ 36863, 0},{ 28671, 1},{ 40960, 0},{ 24576, 1},{ 40959, 0},{ 24575, 1},{ 49152, 0},
+{ 16384, 1},{ 49151, 0},{ 16383, 1},{ 65536, 0},{     0, 1},{ 65535, 0},{    -1, 0},{-32767, 0},
+{-32769, 1},{-32768, 0},{-32770, 1},{-32766, 0},{-32770, 1},{-32767, 0},{-32771, 1},{-32764, 0},
+{-32772, 1},{-32765, 0},{-32773, 1},{-32760, 0},{-32776, 1},{-32761, 0},{-32777, 1},{-32752, 0},
+{-32784, 1},{-32753, 0},{-32785, 1},{-32736, 0},{-32800, 1},{-32737, 0},{-32801, 1},{-32704, 0},
+{-32832, 1},{-32705, 0},{-32833, 1},{-32640, 0},{-32896, 1},{-32641, 0},{-32897, 1},{-32512, 0},
+{-33024, 1},{-32513, 0},{-33025, 1},{-32256, 0},{-33280, 1},{-32257, 0},{-33281, 1},{-31744, 0},
+{-33792, 1},{-31745, 0},{-33793, 1},{-30720, 0},{-34816, 1},{-30721, 0},{-34817, 1},{-28672, 0},
+{-36864, 1},{-28673, 0},{-36865, 1},{-24576, 0},{-40960, 1},{-24577, 0},{-40961, 1},{-16384, 0},
+{-49152, 1},{-16385, 0},{-49153, 1},{     0, 1},{-65536, 1},{    -1, 0},{-65537, 1},{ 32768, 0},
+{ 32766, 1},{ 32767, 0},{ 32765, 1},{ 32769, 0},{ 32765, 1},{ 32768, 0},{ 32764, 1},{ 32771, 0},
+{ 32763, 1},{ 32770, 0},{ 32762, 1},{ 32775, 0},{ 32759, 1},{ 32774, 0},{ 32758, 1},{ 32783, 0},
+{ 32751, 1},{ 32782, 0},{ 32750, 1},{ 32799, 0},{ 32735, 1},{ 32798, 0},{ 32734, 1},{ 32831, 0},
+{ 32703, 1},{ 32830, 0},{ 32702, 1},{ 32895, 0},{ 32639, 1},{ 32894, 0},{ 32638, 1},{ 33023, 0},
+{ 32511, 1},{ 33022, 0},{ 32510, 1},{ 33279, 0},{ 32255, 1},{ 33278, 0},{ 32254, 1},{ 33791, 0},
+{ 31743, 1},{ 33790, 0},{ 31742, 1},{ 34815, 0},{ 30719, 1},{ 34814, 0},{ 30718, 1},{ 36863, 0},
+{ 28671, 1},{ 36862, 0},{ 28670, 1},{ 40959, 0},{ 24575, 1},{ 40958, 0},{ 24574, 1},{ 49151, 0},
+{ 16383, 1},{ 49150, 0},{ 16382, 1},{ 65535, 0},{    -1, 0},{ 65534, 0},{    -2, 0},{-32768, 0},
+{-32770, 1},{-32769, 0},{-32771, 1},{-32767, 0},{-32771, 1},{-32768, 0},{-32772, 1},{-32765, 0},
+{-32773, 1},{-32766, 0},{-32774, 1},{-32761, 0},{-32777, 1},{-32762, 0},{-32778, 1},{-32753, 0},
+{-32785, 1},{-32754, 0},{-32786, 1},{-32737, 0},{-32801, 1},{-32738, 0},{-32802, 1},{-32705, 0},
+{-32833, 1},{-32706, 0},{-32834, 1},{-32641, 0},{-32897, 1},{-32642, 0},{-32898, 1},{-32513, 0},
+{-33025, 1},{-32514, 0},{-33026, 1},{-32257, 0},{-33281, 1},{-32258, 0},{-33282, 1},{-31745, 0},
+{-33793, 1},{-31746, 0},{-33794, 1},{-30721, 0},{-34817, 1},{-30722, 0},{-34818, 1},{-28673, 0},
+{-36865, 1},{-28674, 0},{-36866, 1},{-24577, 0},{-40961, 1},{-24578, 0},{-40962, 1},{-16385, 0},
+{-49153, 1},{-16386, 0},{-49154, 1},{    -1, 0},{-65537, 1},{    -2, 0},{-65538, 1},};
+int main ()
+{
+  mp_limb_t r1, r0;
+  int err = 0;
+  size_t ind = 0;
+  for (size_t i = 0; i < 4096; i++)
+    {
+      int ii = i / 64, jj = i % 64;
+      funcs[i](&r1, &r0);
+      if (r0 != (mp_limb_signed_t) ref[ind][0] || r1 != (mp_limb_signed_t) ref[ind][1]) {
+         printf ("error for f%zu(%d,%d): want (%d,%d) got (%d,%d)\n", i, (int) ops[ii], (int) ops[jj], ref[ind][1], ref[ind][0], (int) r1, (int) r0);
+         err++;
+       }
+      ind++;
+    }
+  return err != 0;
+}

diff --git a/tests/devel/test-sub_ddmmss.c b/tests/devel/test-sub_ddmmss.c
new file mode 100644
index 0000000..87eecc6
--- /dev/null
+++ b/tests/devel/test-sub_ddmmss.c

@@ -0,0 +1,4908 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+static const int ops[64] = {
+1,-1,0,-2,
+2,-2,1,-3,
+4,-4,3,-5,
+8,-8,7,-9,
+16,-16,15,-17,
+32,-32,31,-33,
+64,-64,63,-65,
+128,-128,127,-129,
+256,-256,255,-257,
+512,-512,511,-513,
+1024,-1024,1023,-1025,
+2048,-2048,2047,-2049,
+4096,-4096,4095,-4097,
+8192,-8192,8191,-8193,
+16384,-16384,16383,-16385,
+32768,-32768,32767,-32769,
+};
+static void f0(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f1(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1);*r1p=r1;*r0p=r0;}
+static void f2(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,0);*r1p=r1;*r0p=r0;}
+static void f3(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f4(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2);*r1p=r1;*r0p=r0;}
+static void f5(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f6(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f7(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-3);*r1p=r1;*r0p=r0;}
+static void f8(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4);*r1p=r1;*r0p=r0;}
+static void f9(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4);*r1p=r1;*r0p=r0;}
+static void f10(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,3);*r1p=r1;*r0p=r0;}
+static void f11(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-5);*r1p=r1;*r0p=r0;}
+static void f12(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8);*r1p=r1;*r0p=r0;}
+static void f13(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8);*r1p=r1;*r0p=r0;}
+static void f14(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,7);*r1p=r1;*r0p=r0;}
+static void f15(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-9);*r1p=r1;*r0p=r0;}
+static void f16(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16);*r1p=r1;*r0p=r0;}
+static void f17(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16);*r1p=r1;*r0p=r0;}
+static void f18(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,15);*r1p=r1;*r0p=r0;}
+static void f19(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-17);*r1p=r1;*r0p=r0;}
+static void f20(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32);*r1p=r1;*r0p=r0;}
+static void f21(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32);*r1p=r1;*r0p=r0;}
+static void f22(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,31);*r1p=r1;*r0p=r0;}
+static void f23(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-33);*r1p=r1;*r0p=r0;}
+static void f24(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,64);*r1p=r1;*r0p=r0;}
+static void f25(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-64);*r1p=r1;*r0p=r0;}
+static void f26(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,63);*r1p=r1;*r0p=r0;}
+static void f27(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-65);*r1p=r1;*r0p=r0;}
+static void f28(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,128);*r1p=r1;*r0p=r0;}
+static void f29(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-128);*r1p=r1;*r0p=r0;}
+static void f30(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,127);*r1p=r1;*r0p=r0;}
+static void f31(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-129);*r1p=r1;*r0p=r0;}
+static void f32(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,256);*r1p=r1;*r0p=r0;}
+static void f33(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-256);*r1p=r1;*r0p=r0;}
+static void f34(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,255);*r1p=r1;*r0p=r0;}
+static void f35(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-257);*r1p=r1;*r0p=r0;}
+static void f36(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,512);*r1p=r1;*r0p=r0;}
+static void f37(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-512);*r1p=r1;*r0p=r0;}
+static void f38(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,511);*r1p=r1;*r0p=r0;}
+static void f39(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-513);*r1p=r1;*r0p=r0;}
+static void f40(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1024);*r1p=r1;*r0p=r0;}
+static void f41(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f42(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1023);*r1p=r1;*r0p=r0;}
+static void f43(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f44(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2048);*r1p=r1;*r0p=r0;}
+static void f45(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f46(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2047);*r1p=r1;*r0p=r0;}
+static void f47(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f48(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4096);*r1p=r1;*r0p=r0;}
+static void f49(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f50(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4095);*r1p=r1;*r0p=r0;}
+static void f51(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f52(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8192);*r1p=r1;*r0p=r0;}
+static void f53(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f54(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8191);*r1p=r1;*r0p=r0;}
+static void f55(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f56(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16384);*r1p=r1;*r0p=r0;}
+static void f57(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f58(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16383);*r1p=r1;*r0p=r0;}
+static void f59(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f60(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32768);*r1p=r1;*r0p=r0;}
+static void f61(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f62(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32767);*r1p=r1;*r0p=r0;}
+static void f63(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f64(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,1);*r1p=r1;*r0p=r0;}
+static void f65(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-1);*r1p=r1;*r0p=r0;}
+static void f66(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,0);*r1p=r1;*r0p=r0;}
+static void f67(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-2);*r1p=r1;*r0p=r0;}
+static void f68(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,2);*r1p=r1;*r0p=r0;}
+static void f69(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-2);*r1p=r1;*r0p=r0;}
+static void f70(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,1);*r1p=r1;*r0p=r0;}
+static void f71(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-3);*r1p=r1;*r0p=r0;}
+static void f72(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,4);*r1p=r1;*r0p=r0;}
+static void f73(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-4);*r1p=r1;*r0p=r0;}
+static void f74(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,3);*r1p=r1;*r0p=r0;}
+static void f75(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-5);*r1p=r1;*r0p=r0;}
+static void f76(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,8);*r1p=r1;*r0p=r0;}
+static void f77(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-8);*r1p=r1;*r0p=r0;}
+static void f78(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,7);*r1p=r1;*r0p=r0;}
+static void f79(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-9);*r1p=r1;*r0p=r0;}
+static void f80(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,16);*r1p=r1;*r0p=r0;}
+static void f81(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-16);*r1p=r1;*r0p=r0;}
+static void f82(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,15);*r1p=r1;*r0p=r0;}
+static void f83(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-17);*r1p=r1;*r0p=r0;}
+static void f84(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,32);*r1p=r1;*r0p=r0;}
+static void f85(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-32);*r1p=r1;*r0p=r0;}
+static void f86(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,31);*r1p=r1;*r0p=r0;}
+static void f87(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-33);*r1p=r1;*r0p=r0;}
+static void f88(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,64);*r1p=r1;*r0p=r0;}
+static void f89(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-64);*r1p=r1;*r0p=r0;}
+static void f90(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,63);*r1p=r1;*r0p=r0;}
+static void f91(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-65);*r1p=r1;*r0p=r0;}
+static void f92(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,128);*r1p=r1;*r0p=r0;}
+static void f93(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-128);*r1p=r1;*r0p=r0;}
+static void f94(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,127);*r1p=r1;*r0p=r0;}
+static void f95(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-129);*r1p=r1;*r0p=r0;}
+static void f96(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,256);*r1p=r1;*r0p=r0;}
+static void f97(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-256);*r1p=r1;*r0p=r0;}
+static void f98(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,255);*r1p=r1;*r0p=r0;}
+static void f99(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-257);*r1p=r1;*r0p=r0;}
+static void f100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,512);*r1p=r1;*r0p=r0;}
+static void f101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-512);*r1p=r1;*r0p=r0;}
+static void f102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,511);*r1p=r1;*r0p=r0;}
+static void f103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-513);*r1p=r1;*r0p=r0;}
+static void f104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,1024);*r1p=r1;*r0p=r0;}
+static void f105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,1023);*r1p=r1;*r0p=r0;}
+static void f107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,2048);*r1p=r1;*r0p=r0;}
+static void f109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,2047);*r1p=r1;*r0p=r0;}
+static void f111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,4096);*r1p=r1;*r0p=r0;}
+static void f113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,4095);*r1p=r1;*r0p=r0;}
+static void f115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,8192);*r1p=r1;*r0p=r0;}
+static void f117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,8191);*r1p=r1;*r0p=r0;}
+static void f119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,16384);*r1p=r1;*r0p=r0;}
+static void f121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,16383);*r1p=r1;*r0p=r0;}
+static void f123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,32768);*r1p=r1;*r0p=r0;}
+static void f125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,32767);*r1p=r1;*r0p=r0;}
+static void f127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,1);*r1p=r1;*r0p=r0;}
+static void f129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-1);*r1p=r1;*r0p=r0;}
+static void f130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,0);*r1p=r1;*r0p=r0;}
+static void f131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-2);*r1p=r1;*r0p=r0;}
+static void f132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,2);*r1p=r1;*r0p=r0;}
+static void f133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-2);*r1p=r1;*r0p=r0;}
+static void f134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,1);*r1p=r1;*r0p=r0;}
+static void f135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-3);*r1p=r1;*r0p=r0;}
+static void f136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,4);*r1p=r1;*r0p=r0;}
+static void f137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-4);*r1p=r1;*r0p=r0;}
+static void f138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,3);*r1p=r1;*r0p=r0;}
+static void f139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-5);*r1p=r1;*r0p=r0;}
+static void f140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,8);*r1p=r1;*r0p=r0;}
+static void f141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-8);*r1p=r1;*r0p=r0;}
+static void f142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,7);*r1p=r1;*r0p=r0;}
+static void f143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-9);*r1p=r1;*r0p=r0;}
+static void f144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,16);*r1p=r1;*r0p=r0;}
+static void f145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-16);*r1p=r1;*r0p=r0;}
+static void f146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,15);*r1p=r1;*r0p=r0;}
+static void f147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-17);*r1p=r1;*r0p=r0;}
+static void f148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,32);*r1p=r1;*r0p=r0;}
+static void f149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-32);*r1p=r1;*r0p=r0;}
+static void f150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,31);*r1p=r1;*r0p=r0;}
+static void f151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-33);*r1p=r1;*r0p=r0;}
+static void f152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,64);*r1p=r1;*r0p=r0;}
+static void f153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-64);*r1p=r1;*r0p=r0;}
+static void f154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,63);*r1p=r1;*r0p=r0;}
+static void f155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-65);*r1p=r1;*r0p=r0;}
+static void f156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,128);*r1p=r1;*r0p=r0;}
+static void f157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-128);*r1p=r1;*r0p=r0;}
+static void f158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,127);*r1p=r1;*r0p=r0;}
+static void f159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-129);*r1p=r1;*r0p=r0;}
+static void f160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,256);*r1p=r1;*r0p=r0;}
+static void f161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-256);*r1p=r1;*r0p=r0;}
+static void f162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,255);*r1p=r1;*r0p=r0;}
+static void f163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-257);*r1p=r1;*r0p=r0;}
+static void f164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,512);*r1p=r1;*r0p=r0;}
+static void f165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-512);*r1p=r1;*r0p=r0;}
+static void f166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,511);*r1p=r1;*r0p=r0;}
+static void f167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-513);*r1p=r1;*r0p=r0;}
+static void f168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,1024);*r1p=r1;*r0p=r0;}
+static void f169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-1024);*r1p=r1;*r0p=r0;}
+static void f170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,1023);*r1p=r1;*r0p=r0;}
+static void f171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-1025);*r1p=r1;*r0p=r0;}
+static void f172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,2048);*r1p=r1;*r0p=r0;}
+static void f173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-2048);*r1p=r1;*r0p=r0;}
+static void f174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,2047);*r1p=r1;*r0p=r0;}
+static void f175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-2049);*r1p=r1;*r0p=r0;}
+static void f176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,4096);*r1p=r1;*r0p=r0;}
+static void f177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-4096);*r1p=r1;*r0p=r0;}
+static void f178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,4095);*r1p=r1;*r0p=r0;}
+static void f179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-4097);*r1p=r1;*r0p=r0;}
+static void f180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,8192);*r1p=r1;*r0p=r0;}
+static void f181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-8192);*r1p=r1;*r0p=r0;}
+static void f182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,8191);*r1p=r1;*r0p=r0;}
+static void f183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-8193);*r1p=r1;*r0p=r0;}
+static void f184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,16384);*r1p=r1;*r0p=r0;}
+static void f185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-16384);*r1p=r1;*r0p=r0;}
+static void f186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,16383);*r1p=r1;*r0p=r0;}
+static void f187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-16385);*r1p=r1;*r0p=r0;}
+static void f188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,32768);*r1p=r1;*r0p=r0;}
+static void f189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-32768);*r1p=r1;*r0p=r0;}
+static void f190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,32767);*r1p=r1;*r0p=r0;}
+static void f191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,0,0,-32769);*r1p=r1;*r0p=r0;}
+static void f192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1);*r1p=r1;*r0p=r0;}
+static void f194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,0);*r1p=r1;*r0p=r0;}
+static void f195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2);*r1p=r1;*r0p=r0;}
+static void f197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-3);*r1p=r1;*r0p=r0;}
+static void f200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4);*r1p=r1;*r0p=r0;}
+static void f201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4);*r1p=r1;*r0p=r0;}
+static void f202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,3);*r1p=r1;*r0p=r0;}
+static void f203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-5);*r1p=r1;*r0p=r0;}
+static void f204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8);*r1p=r1;*r0p=r0;}
+static void f205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8);*r1p=r1;*r0p=r0;}
+static void f206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,7);*r1p=r1;*r0p=r0;}
+static void f207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-9);*r1p=r1;*r0p=r0;}
+static void f208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16);*r1p=r1;*r0p=r0;}
+static void f209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16);*r1p=r1;*r0p=r0;}
+static void f210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,15);*r1p=r1;*r0p=r0;}
+static void f211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-17);*r1p=r1;*r0p=r0;}
+static void f212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32);*r1p=r1;*r0p=r0;}
+static void f213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32);*r1p=r1;*r0p=r0;}
+static void f214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,31);*r1p=r1;*r0p=r0;}
+static void f215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-33);*r1p=r1;*r0p=r0;}
+static void f216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,64);*r1p=r1;*r0p=r0;}
+static void f217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-64);*r1p=r1;*r0p=r0;}
+static void f218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,63);*r1p=r1;*r0p=r0;}
+static void f219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-65);*r1p=r1;*r0p=r0;}
+static void f220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,128);*r1p=r1;*r0p=r0;}
+static void f221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-128);*r1p=r1;*r0p=r0;}
+static void f222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,127);*r1p=r1;*r0p=r0;}
+static void f223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-129);*r1p=r1;*r0p=r0;}
+static void f224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,256);*r1p=r1;*r0p=r0;}
+static void f225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-256);*r1p=r1;*r0p=r0;}
+static void f226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,255);*r1p=r1;*r0p=r0;}
+static void f227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-257);*r1p=r1;*r0p=r0;}
+static void f228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,512);*r1p=r1;*r0p=r0;}
+static void f229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-512);*r1p=r1;*r0p=r0;}
+static void f230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,511);*r1p=r1;*r0p=r0;}
+static void f231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-513);*r1p=r1;*r0p=r0;}
+static void f232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1024);*r1p=r1;*r0p=r0;}
+static void f233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1023);*r1p=r1;*r0p=r0;}
+static void f235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2048);*r1p=r1;*r0p=r0;}
+static void f237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2047);*r1p=r1;*r0p=r0;}
+static void f239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4096);*r1p=r1;*r0p=r0;}
+static void f241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4095);*r1p=r1;*r0p=r0;}
+static void f243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8192);*r1p=r1;*r0p=r0;}
+static void f245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8191);*r1p=r1;*r0p=r0;}
+static void f247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16384);*r1p=r1;*r0p=r0;}
+static void f249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16383);*r1p=r1;*r0p=r0;}
+static void f251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32768);*r1p=r1;*r0p=r0;}
+static void f253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32767);*r1p=r1;*r0p=r0;}
+static void f255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,1);*r1p=r1;*r0p=r0;}
+static void f257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-1);*r1p=r1;*r0p=r0;}
+static void f258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,0);*r1p=r1;*r0p=r0;}
+static void f259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-2);*r1p=r1;*r0p=r0;}
+static void f260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,2);*r1p=r1;*r0p=r0;}
+static void f261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-2);*r1p=r1;*r0p=r0;}
+static void f262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,1);*r1p=r1;*r0p=r0;}
+static void f263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-3);*r1p=r1;*r0p=r0;}
+static void f264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,4);*r1p=r1;*r0p=r0;}
+static void f265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-4);*r1p=r1;*r0p=r0;}
+static void f266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,3);*r1p=r1;*r0p=r0;}
+static void f267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-5);*r1p=r1;*r0p=r0;}
+static void f268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,8);*r1p=r1;*r0p=r0;}
+static void f269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-8);*r1p=r1;*r0p=r0;}
+static void f270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,7);*r1p=r1;*r0p=r0;}
+static void f271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-9);*r1p=r1;*r0p=r0;}
+static void f272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,16);*r1p=r1;*r0p=r0;}
+static void f273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-16);*r1p=r1;*r0p=r0;}
+static void f274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,15);*r1p=r1;*r0p=r0;}
+static void f275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-17);*r1p=r1;*r0p=r0;}
+static void f276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,32);*r1p=r1;*r0p=r0;}
+static void f277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-32);*r1p=r1;*r0p=r0;}
+static void f278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,31);*r1p=r1;*r0p=r0;}
+static void f279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-33);*r1p=r1;*r0p=r0;}
+static void f280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,64);*r1p=r1;*r0p=r0;}
+static void f281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-64);*r1p=r1;*r0p=r0;}
+static void f282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,63);*r1p=r1;*r0p=r0;}
+static void f283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-65);*r1p=r1;*r0p=r0;}
+static void f284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,128);*r1p=r1;*r0p=r0;}
+static void f285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-128);*r1p=r1;*r0p=r0;}
+static void f286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,127);*r1p=r1;*r0p=r0;}
+static void f287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-129);*r1p=r1;*r0p=r0;}
+static void f288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,256);*r1p=r1;*r0p=r0;}
+static void f289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-256);*r1p=r1;*r0p=r0;}
+static void f290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,255);*r1p=r1;*r0p=r0;}
+static void f291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-257);*r1p=r1;*r0p=r0;}
+static void f292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,512);*r1p=r1;*r0p=r0;}
+static void f293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-512);*r1p=r1;*r0p=r0;}
+static void f294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,511);*r1p=r1;*r0p=r0;}
+static void f295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-513);*r1p=r1;*r0p=r0;}
+static void f296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,1024);*r1p=r1;*r0p=r0;}
+static void f297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,1023);*r1p=r1;*r0p=r0;}
+static void f299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,2048);*r1p=r1;*r0p=r0;}
+static void f301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,2047);*r1p=r1;*r0p=r0;}
+static void f303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,4096);*r1p=r1;*r0p=r0;}
+static void f305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,4095);*r1p=r1;*r0p=r0;}
+static void f307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,8192);*r1p=r1;*r0p=r0;}
+static void f309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,8191);*r1p=r1;*r0p=r0;}
+static void f311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,16384);*r1p=r1;*r0p=r0;}
+static void f313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,16383);*r1p=r1;*r0p=r0;}
+static void f315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,32768);*r1p=r1;*r0p=r0;}
+static void f317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,32767);*r1p=r1;*r0p=r0;}
+static void f319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1);*r1p=r1;*r0p=r0;}
+static void f322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,0);*r1p=r1;*r0p=r0;}
+static void f323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2);*r1p=r1;*r0p=r0;}
+static void f325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2);*r1p=r1;*r0p=r0;}
+static void f326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1);*r1p=r1;*r0p=r0;}
+static void f327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-3);*r1p=r1;*r0p=r0;}
+static void f328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4);*r1p=r1;*r0p=r0;}
+static void f329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4);*r1p=r1;*r0p=r0;}
+static void f330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,3);*r1p=r1;*r0p=r0;}
+static void f331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-5);*r1p=r1;*r0p=r0;}
+static void f332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8);*r1p=r1;*r0p=r0;}
+static void f333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8);*r1p=r1;*r0p=r0;}
+static void f334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,7);*r1p=r1;*r0p=r0;}
+static void f335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-9);*r1p=r1;*r0p=r0;}
+static void f336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16);*r1p=r1;*r0p=r0;}
+static void f337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16);*r1p=r1;*r0p=r0;}
+static void f338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,15);*r1p=r1;*r0p=r0;}
+static void f339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-17);*r1p=r1;*r0p=r0;}
+static void f340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32);*r1p=r1;*r0p=r0;}
+static void f341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32);*r1p=r1;*r0p=r0;}
+static void f342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,31);*r1p=r1;*r0p=r0;}
+static void f343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-33);*r1p=r1;*r0p=r0;}
+static void f344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,64);*r1p=r1;*r0p=r0;}
+static void f345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-64);*r1p=r1;*r0p=r0;}
+static void f346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,63);*r1p=r1;*r0p=r0;}
+static void f347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-65);*r1p=r1;*r0p=r0;}
+static void f348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,128);*r1p=r1;*r0p=r0;}
+static void f349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-128);*r1p=r1;*r0p=r0;}
+static void f350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,127);*r1p=r1;*r0p=r0;}
+static void f351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-129);*r1p=r1;*r0p=r0;}
+static void f352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,256);*r1p=r1;*r0p=r0;}
+static void f353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-256);*r1p=r1;*r0p=r0;}
+static void f354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,255);*r1p=r1;*r0p=r0;}
+static void f355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-257);*r1p=r1;*r0p=r0;}
+static void f356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,512);*r1p=r1;*r0p=r0;}
+static void f357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-512);*r1p=r1;*r0p=r0;}
+static void f358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,511);*r1p=r1;*r0p=r0;}
+static void f359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-513);*r1p=r1;*r0p=r0;}
+static void f360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1024);*r1p=r1;*r0p=r0;}
+static void f361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1024);*r1p=r1;*r0p=r0;}
+static void f362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,1023);*r1p=r1;*r0p=r0;}
+static void f363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-1025);*r1p=r1;*r0p=r0;}
+static void f364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2048);*r1p=r1;*r0p=r0;}
+static void f365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2048);*r1p=r1;*r0p=r0;}
+static void f366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,2047);*r1p=r1;*r0p=r0;}
+static void f367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-2049);*r1p=r1;*r0p=r0;}
+static void f368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4096);*r1p=r1;*r0p=r0;}
+static void f369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4096);*r1p=r1;*r0p=r0;}
+static void f370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,4095);*r1p=r1;*r0p=r0;}
+static void f371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-4097);*r1p=r1;*r0p=r0;}
+static void f372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8192);*r1p=r1;*r0p=r0;}
+static void f373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8192);*r1p=r1;*r0p=r0;}
+static void f374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,8191);*r1p=r1;*r0p=r0;}
+static void f375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-8193);*r1p=r1;*r0p=r0;}
+static void f376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16384);*r1p=r1;*r0p=r0;}
+static void f377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16384);*r1p=r1;*r0p=r0;}
+static void f378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,16383);*r1p=r1;*r0p=r0;}
+static void f379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-16385);*r1p=r1;*r0p=r0;}
+static void f380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32768);*r1p=r1;*r0p=r0;}
+static void f381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32768);*r1p=r1;*r0p=r0;}
+static void f382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,32767);*r1p=r1;*r0p=r0;}
+static void f383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2,0,-32769);*r1p=r1;*r0p=r0;}
+static void f384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1);*r1p=r1;*r0p=r0;}
+static void f386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,0);*r1p=r1;*r0p=r0;}
+static void f387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2);*r1p=r1;*r0p=r0;}
+static void f389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2);*r1p=r1;*r0p=r0;}
+static void f390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1);*r1p=r1;*r0p=r0;}
+static void f391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-3);*r1p=r1;*r0p=r0;}
+static void f392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4);*r1p=r1;*r0p=r0;}
+static void f393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4);*r1p=r1;*r0p=r0;}
+static void f394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,3);*r1p=r1;*r0p=r0;}
+static void f395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-5);*r1p=r1;*r0p=r0;}
+static void f396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8);*r1p=r1;*r0p=r0;}
+static void f397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8);*r1p=r1;*r0p=r0;}
+static void f398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,7);*r1p=r1;*r0p=r0;}
+static void f399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-9);*r1p=r1;*r0p=r0;}
+static void f400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16);*r1p=r1;*r0p=r0;}
+static void f401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16);*r1p=r1;*r0p=r0;}
+static void f402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,15);*r1p=r1;*r0p=r0;}
+static void f403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-17);*r1p=r1;*r0p=r0;}
+static void f404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32);*r1p=r1;*r0p=r0;}
+static void f405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32);*r1p=r1;*r0p=r0;}
+static void f406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,31);*r1p=r1;*r0p=r0;}
+static void f407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-33);*r1p=r1;*r0p=r0;}
+static void f408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,64);*r1p=r1;*r0p=r0;}
+static void f409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-64);*r1p=r1;*r0p=r0;}
+static void f410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,63);*r1p=r1;*r0p=r0;}
+static void f411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-65);*r1p=r1;*r0p=r0;}
+static void f412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,128);*r1p=r1;*r0p=r0;}
+static void f413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-128);*r1p=r1;*r0p=r0;}
+static void f414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,127);*r1p=r1;*r0p=r0;}
+static void f415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-129);*r1p=r1;*r0p=r0;}
+static void f416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,256);*r1p=r1;*r0p=r0;}
+static void f417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-256);*r1p=r1;*r0p=r0;}
+static void f418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,255);*r1p=r1;*r0p=r0;}
+static void f419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-257);*r1p=r1;*r0p=r0;}
+static void f420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,512);*r1p=r1;*r0p=r0;}
+static void f421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-512);*r1p=r1;*r0p=r0;}
+static void f422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,511);*r1p=r1;*r0p=r0;}
+static void f423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-513);*r1p=r1;*r0p=r0;}
+static void f424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1024);*r1p=r1;*r0p=r0;}
+static void f425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1024);*r1p=r1;*r0p=r0;}
+static void f426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,1023);*r1p=r1;*r0p=r0;}
+static void f427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-1025);*r1p=r1;*r0p=r0;}
+static void f428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2048);*r1p=r1;*r0p=r0;}
+static void f429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2048);*r1p=r1;*r0p=r0;}
+static void f430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,2047);*r1p=r1;*r0p=r0;}
+static void f431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-2049);*r1p=r1;*r0p=r0;}
+static void f432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4096);*r1p=r1;*r0p=r0;}
+static void f433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4096);*r1p=r1;*r0p=r0;}
+static void f434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,4095);*r1p=r1;*r0p=r0;}
+static void f435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-4097);*r1p=r1;*r0p=r0;}
+static void f436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8192);*r1p=r1;*r0p=r0;}
+static void f437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8192);*r1p=r1;*r0p=r0;}
+static void f438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,8191);*r1p=r1;*r0p=r0;}
+static void f439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-8193);*r1p=r1;*r0p=r0;}
+static void f440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16384);*r1p=r1;*r0p=r0;}
+static void f441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16384);*r1p=r1;*r0p=r0;}
+static void f442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,16383);*r1p=r1;*r0p=r0;}
+static void f443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-16385);*r1p=r1;*r0p=r0;}
+static void f444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32768);*r1p=r1;*r0p=r0;}
+static void f445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32768);*r1p=r1;*r0p=r0;}
+static void f446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,32767);*r1p=r1;*r0p=r0;}
+static void f447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1,0,-32769);*r1p=r1;*r0p=r0;}
+static void f448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,1);*r1p=r1;*r0p=r0;}
+static void f449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-1);*r1p=r1;*r0p=r0;}
+static void f450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,0);*r1p=r1;*r0p=r0;}
+static void f451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-2);*r1p=r1;*r0p=r0;}
+static void f452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,2);*r1p=r1;*r0p=r0;}
+static void f453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-2);*r1p=r1;*r0p=r0;}
+static void f454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,1);*r1p=r1;*r0p=r0;}
+static void f455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-3);*r1p=r1;*r0p=r0;}
+static void f456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,4);*r1p=r1;*r0p=r0;}
+static void f457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-4);*r1p=r1;*r0p=r0;}
+static void f458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,3);*r1p=r1;*r0p=r0;}
+static void f459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-5);*r1p=r1;*r0p=r0;}
+static void f460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,8);*r1p=r1;*r0p=r0;}
+static void f461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-8);*r1p=r1;*r0p=r0;}
+static void f462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,7);*r1p=r1;*r0p=r0;}
+static void f463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-9);*r1p=r1;*r0p=r0;}
+static void f464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,16);*r1p=r1;*r0p=r0;}
+static void f465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-16);*r1p=r1;*r0p=r0;}
+static void f466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,15);*r1p=r1;*r0p=r0;}
+static void f467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-17);*r1p=r1;*r0p=r0;}
+static void f468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,32);*r1p=r1;*r0p=r0;}
+static void f469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-32);*r1p=r1;*r0p=r0;}
+static void f470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,31);*r1p=r1;*r0p=r0;}
+static void f471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-33);*r1p=r1;*r0p=r0;}
+static void f472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,64);*r1p=r1;*r0p=r0;}
+static void f473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-64);*r1p=r1;*r0p=r0;}
+static void f474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,63);*r1p=r1;*r0p=r0;}
+static void f475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-65);*r1p=r1;*r0p=r0;}
+static void f476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,128);*r1p=r1;*r0p=r0;}
+static void f477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-128);*r1p=r1;*r0p=r0;}
+static void f478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,127);*r1p=r1;*r0p=r0;}
+static void f479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-129);*r1p=r1;*r0p=r0;}
+static void f480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,256);*r1p=r1;*r0p=r0;}
+static void f481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-256);*r1p=r1;*r0p=r0;}
+static void f482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,255);*r1p=r1;*r0p=r0;}
+static void f483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-257);*r1p=r1;*r0p=r0;}
+static void f484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,512);*r1p=r1;*r0p=r0;}
+static void f485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-512);*r1p=r1;*r0p=r0;}
+static void f486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,511);*r1p=r1;*r0p=r0;}
+static void f487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-513);*r1p=r1;*r0p=r0;}
+static void f488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,1024);*r1p=r1;*r0p=r0;}
+static void f489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-1024);*r1p=r1;*r0p=r0;}
+static void f490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,1023);*r1p=r1;*r0p=r0;}
+static void f491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-1025);*r1p=r1;*r0p=r0;}
+static void f492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,2048);*r1p=r1;*r0p=r0;}
+static void f493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-2048);*r1p=r1;*r0p=r0;}
+static void f494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,2047);*r1p=r1;*r0p=r0;}
+static void f495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-2049);*r1p=r1;*r0p=r0;}
+static void f496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,4096);*r1p=r1;*r0p=r0;}
+static void f497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-4096);*r1p=r1;*r0p=r0;}
+static void f498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,4095);*r1p=r1;*r0p=r0;}
+static void f499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-4097);*r1p=r1;*r0p=r0;}
+static void f500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,8192);*r1p=r1;*r0p=r0;}
+static void f501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-8192);*r1p=r1;*r0p=r0;}
+static void f502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,8191);*r1p=r1;*r0p=r0;}
+static void f503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-8193);*r1p=r1;*r0p=r0;}
+static void f504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,16384);*r1p=r1;*r0p=r0;}
+static void f505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-16384);*r1p=r1;*r0p=r0;}
+static void f506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,16383);*r1p=r1;*r0p=r0;}
+static void f507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-16385);*r1p=r1;*r0p=r0;}
+static void f508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,32768);*r1p=r1;*r0p=r0;}
+static void f509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-32768);*r1p=r1;*r0p=r0;}
+static void f510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,32767);*r1p=r1;*r0p=r0;}
+static void f511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-3,0,-32769);*r1p=r1;*r0p=r0;}
+static void f512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,1);*r1p=r1;*r0p=r0;}
+static void f513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-1);*r1p=r1;*r0p=r0;}
+static void f514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,0);*r1p=r1;*r0p=r0;}
+static void f515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-2);*r1p=r1;*r0p=r0;}
+static void f516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,2);*r1p=r1;*r0p=r0;}
+static void f517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-2);*r1p=r1;*r0p=r0;}
+static void f518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,1);*r1p=r1;*r0p=r0;}
+static void f519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-3);*r1p=r1;*r0p=r0;}
+static void f520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,4);*r1p=r1;*r0p=r0;}
+static void f521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-4);*r1p=r1;*r0p=r0;}
+static void f522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,3);*r1p=r1;*r0p=r0;}
+static void f523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-5);*r1p=r1;*r0p=r0;}
+static void f524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,8);*r1p=r1;*r0p=r0;}
+static void f525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-8);*r1p=r1;*r0p=r0;}
+static void f526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,7);*r1p=r1;*r0p=r0;}
+static void f527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-9);*r1p=r1;*r0p=r0;}
+static void f528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,16);*r1p=r1;*r0p=r0;}
+static void f529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-16);*r1p=r1;*r0p=r0;}
+static void f530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,15);*r1p=r1;*r0p=r0;}
+static void f531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-17);*r1p=r1;*r0p=r0;}
+static void f532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,32);*r1p=r1;*r0p=r0;}
+static void f533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-32);*r1p=r1;*r0p=r0;}
+static void f534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,31);*r1p=r1;*r0p=r0;}
+static void f535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-33);*r1p=r1;*r0p=r0;}
+static void f536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,64);*r1p=r1;*r0p=r0;}
+static void f537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-64);*r1p=r1;*r0p=r0;}
+static void f538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,63);*r1p=r1;*r0p=r0;}
+static void f539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-65);*r1p=r1;*r0p=r0;}
+static void f540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,128);*r1p=r1;*r0p=r0;}
+static void f541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-128);*r1p=r1;*r0p=r0;}
+static void f542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,127);*r1p=r1;*r0p=r0;}
+static void f543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-129);*r1p=r1;*r0p=r0;}
+static void f544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,256);*r1p=r1;*r0p=r0;}
+static void f545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-256);*r1p=r1;*r0p=r0;}
+static void f546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,255);*r1p=r1;*r0p=r0;}
+static void f547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-257);*r1p=r1;*r0p=r0;}
+static void f548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,512);*r1p=r1;*r0p=r0;}
+static void f549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-512);*r1p=r1;*r0p=r0;}
+static void f550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,511);*r1p=r1;*r0p=r0;}
+static void f551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-513);*r1p=r1;*r0p=r0;}
+static void f552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,1024);*r1p=r1;*r0p=r0;}
+static void f553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-1024);*r1p=r1;*r0p=r0;}
+static void f554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,1023);*r1p=r1;*r0p=r0;}
+static void f555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-1025);*r1p=r1;*r0p=r0;}
+static void f556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,2048);*r1p=r1;*r0p=r0;}
+static void f557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-2048);*r1p=r1;*r0p=r0;}
+static void f558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,2047);*r1p=r1;*r0p=r0;}
+static void f559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-2049);*r1p=r1;*r0p=r0;}
+static void f560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,4096);*r1p=r1;*r0p=r0;}
+static void f561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-4096);*r1p=r1;*r0p=r0;}
+static void f562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,4095);*r1p=r1;*r0p=r0;}
+static void f563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-4097);*r1p=r1;*r0p=r0;}
+static void f564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,8192);*r1p=r1;*r0p=r0;}
+static void f565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-8192);*r1p=r1;*r0p=r0;}
+static void f566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,8191);*r1p=r1;*r0p=r0;}
+static void f567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-8193);*r1p=r1;*r0p=r0;}
+static void f568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,16384);*r1p=r1;*r0p=r0;}
+static void f569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-16384);*r1p=r1;*r0p=r0;}
+static void f570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,16383);*r1p=r1;*r0p=r0;}
+static void f571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-16385);*r1p=r1;*r0p=r0;}
+static void f572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,32768);*r1p=r1;*r0p=r0;}
+static void f573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-32768);*r1p=r1;*r0p=r0;}
+static void f574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,32767);*r1p=r1;*r0p=r0;}
+static void f575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4,0,-32769);*r1p=r1;*r0p=r0;}
+static void f576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,1);*r1p=r1;*r0p=r0;}
+static void f577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-1);*r1p=r1;*r0p=r0;}
+static void f578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,0);*r1p=r1;*r0p=r0;}
+static void f579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-2);*r1p=r1;*r0p=r0;}
+static void f580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,2);*r1p=r1;*r0p=r0;}
+static void f581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-2);*r1p=r1;*r0p=r0;}
+static void f582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,1);*r1p=r1;*r0p=r0;}
+static void f583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-3);*r1p=r1;*r0p=r0;}
+static void f584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,4);*r1p=r1;*r0p=r0;}
+static void f585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-4);*r1p=r1;*r0p=r0;}
+static void f586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,3);*r1p=r1;*r0p=r0;}
+static void f587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-5);*r1p=r1;*r0p=r0;}
+static void f588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,8);*r1p=r1;*r0p=r0;}
+static void f589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-8);*r1p=r1;*r0p=r0;}
+static void f590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,7);*r1p=r1;*r0p=r0;}
+static void f591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-9);*r1p=r1;*r0p=r0;}
+static void f592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,16);*r1p=r1;*r0p=r0;}
+static void f593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-16);*r1p=r1;*r0p=r0;}
+static void f594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,15);*r1p=r1;*r0p=r0;}
+static void f595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-17);*r1p=r1;*r0p=r0;}
+static void f596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,32);*r1p=r1;*r0p=r0;}
+static void f597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-32);*r1p=r1;*r0p=r0;}
+static void f598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,31);*r1p=r1;*r0p=r0;}
+static void f599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-33);*r1p=r1;*r0p=r0;}
+static void f600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,64);*r1p=r1;*r0p=r0;}
+static void f601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-64);*r1p=r1;*r0p=r0;}
+static void f602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,63);*r1p=r1;*r0p=r0;}
+static void f603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-65);*r1p=r1;*r0p=r0;}
+static void f604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,128);*r1p=r1;*r0p=r0;}
+static void f605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-128);*r1p=r1;*r0p=r0;}
+static void f606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,127);*r1p=r1;*r0p=r0;}
+static void f607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-129);*r1p=r1;*r0p=r0;}
+static void f608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,256);*r1p=r1;*r0p=r0;}
+static void f609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-256);*r1p=r1;*r0p=r0;}
+static void f610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,255);*r1p=r1;*r0p=r0;}
+static void f611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-257);*r1p=r1;*r0p=r0;}
+static void f612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,512);*r1p=r1;*r0p=r0;}
+static void f613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-512);*r1p=r1;*r0p=r0;}
+static void f614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,511);*r1p=r1;*r0p=r0;}
+static void f615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-513);*r1p=r1;*r0p=r0;}
+static void f616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,1024);*r1p=r1;*r0p=r0;}
+static void f617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-1024);*r1p=r1;*r0p=r0;}
+static void f618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,1023);*r1p=r1;*r0p=r0;}
+static void f619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-1025);*r1p=r1;*r0p=r0;}
+static void f620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,2048);*r1p=r1;*r0p=r0;}
+static void f621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-2048);*r1p=r1;*r0p=r0;}
+static void f622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,2047);*r1p=r1;*r0p=r0;}
+static void f623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-2049);*r1p=r1;*r0p=r0;}
+static void f624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,4096);*r1p=r1;*r0p=r0;}
+static void f625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-4096);*r1p=r1;*r0p=r0;}
+static void f626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,4095);*r1p=r1;*r0p=r0;}
+static void f627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-4097);*r1p=r1;*r0p=r0;}
+static void f628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,8192);*r1p=r1;*r0p=r0;}
+static void f629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-8192);*r1p=r1;*r0p=r0;}
+static void f630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,8191);*r1p=r1;*r0p=r0;}
+static void f631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-8193);*r1p=r1;*r0p=r0;}
+static void f632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,16384);*r1p=r1;*r0p=r0;}
+static void f633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-16384);*r1p=r1;*r0p=r0;}
+static void f634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,16383);*r1p=r1;*r0p=r0;}
+static void f635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-16385);*r1p=r1;*r0p=r0;}
+static void f636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,32768);*r1p=r1;*r0p=r0;}
+static void f637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-32768);*r1p=r1;*r0p=r0;}
+static void f638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,32767);*r1p=r1;*r0p=r0;}
+static void f639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4,0,-32769);*r1p=r1;*r0p=r0;}
+static void f640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,1);*r1p=r1;*r0p=r0;}
+static void f641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-1);*r1p=r1;*r0p=r0;}
+static void f642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,0);*r1p=r1;*r0p=r0;}
+static void f643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-2);*r1p=r1;*r0p=r0;}
+static void f644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,2);*r1p=r1;*r0p=r0;}
+static void f645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-2);*r1p=r1;*r0p=r0;}
+static void f646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,1);*r1p=r1;*r0p=r0;}
+static void f647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-3);*r1p=r1;*r0p=r0;}
+static void f648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,4);*r1p=r1;*r0p=r0;}
+static void f649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-4);*r1p=r1;*r0p=r0;}
+static void f650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,3);*r1p=r1;*r0p=r0;}
+static void f651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-5);*r1p=r1;*r0p=r0;}
+static void f652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,8);*r1p=r1;*r0p=r0;}
+static void f653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-8);*r1p=r1;*r0p=r0;}
+static void f654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,7);*r1p=r1;*r0p=r0;}
+static void f655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-9);*r1p=r1;*r0p=r0;}
+static void f656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,16);*r1p=r1;*r0p=r0;}
+static void f657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-16);*r1p=r1;*r0p=r0;}
+static void f658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,15);*r1p=r1;*r0p=r0;}
+static void f659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-17);*r1p=r1;*r0p=r0;}
+static void f660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,32);*r1p=r1;*r0p=r0;}
+static void f661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-32);*r1p=r1;*r0p=r0;}
+static void f662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,31);*r1p=r1;*r0p=r0;}
+static void f663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-33);*r1p=r1;*r0p=r0;}
+static void f664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,64);*r1p=r1;*r0p=r0;}
+static void f665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-64);*r1p=r1;*r0p=r0;}
+static void f666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,63);*r1p=r1;*r0p=r0;}
+static void f667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-65);*r1p=r1;*r0p=r0;}
+static void f668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,128);*r1p=r1;*r0p=r0;}
+static void f669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-128);*r1p=r1;*r0p=r0;}
+static void f670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,127);*r1p=r1;*r0p=r0;}
+static void f671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-129);*r1p=r1;*r0p=r0;}
+static void f672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,256);*r1p=r1;*r0p=r0;}
+static void f673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-256);*r1p=r1;*r0p=r0;}
+static void f674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,255);*r1p=r1;*r0p=r0;}
+static void f675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-257);*r1p=r1;*r0p=r0;}
+static void f676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,512);*r1p=r1;*r0p=r0;}
+static void f677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-512);*r1p=r1;*r0p=r0;}
+static void f678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,511);*r1p=r1;*r0p=r0;}
+static void f679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-513);*r1p=r1;*r0p=r0;}
+static void f680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,1024);*r1p=r1;*r0p=r0;}
+static void f681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-1024);*r1p=r1;*r0p=r0;}
+static void f682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,1023);*r1p=r1;*r0p=r0;}
+static void f683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-1025);*r1p=r1;*r0p=r0;}
+static void f684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,2048);*r1p=r1;*r0p=r0;}
+static void f685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-2048);*r1p=r1;*r0p=r0;}
+static void f686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,2047);*r1p=r1;*r0p=r0;}
+static void f687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-2049);*r1p=r1;*r0p=r0;}
+static void f688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,4096);*r1p=r1;*r0p=r0;}
+static void f689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-4096);*r1p=r1;*r0p=r0;}
+static void f690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,4095);*r1p=r1;*r0p=r0;}
+static void f691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-4097);*r1p=r1;*r0p=r0;}
+static void f692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,8192);*r1p=r1;*r0p=r0;}
+static void f693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-8192);*r1p=r1;*r0p=r0;}
+static void f694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,8191);*r1p=r1;*r0p=r0;}
+static void f695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-8193);*r1p=r1;*r0p=r0;}
+static void f696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,16384);*r1p=r1;*r0p=r0;}
+static void f697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-16384);*r1p=r1;*r0p=r0;}
+static void f698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,16383);*r1p=r1;*r0p=r0;}
+static void f699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-16385);*r1p=r1;*r0p=r0;}
+static void f700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,32768);*r1p=r1;*r0p=r0;}
+static void f701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-32768);*r1p=r1;*r0p=r0;}
+static void f702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,32767);*r1p=r1;*r0p=r0;}
+static void f703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,3,0,-32769);*r1p=r1;*r0p=r0;}
+static void f704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,1);*r1p=r1;*r0p=r0;}
+static void f705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-1);*r1p=r1;*r0p=r0;}
+static void f706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,0);*r1p=r1;*r0p=r0;}
+static void f707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-2);*r1p=r1;*r0p=r0;}
+static void f708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,2);*r1p=r1;*r0p=r0;}
+static void f709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-2);*r1p=r1;*r0p=r0;}
+static void f710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,1);*r1p=r1;*r0p=r0;}
+static void f711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-3);*r1p=r1;*r0p=r0;}
+static void f712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,4);*r1p=r1;*r0p=r0;}
+static void f713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-4);*r1p=r1;*r0p=r0;}
+static void f714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,3);*r1p=r1;*r0p=r0;}
+static void f715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-5);*r1p=r1;*r0p=r0;}
+static void f716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,8);*r1p=r1;*r0p=r0;}
+static void f717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-8);*r1p=r1;*r0p=r0;}
+static void f718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,7);*r1p=r1;*r0p=r0;}
+static void f719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-9);*r1p=r1;*r0p=r0;}
+static void f720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,16);*r1p=r1;*r0p=r0;}
+static void f721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-16);*r1p=r1;*r0p=r0;}
+static void f722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,15);*r1p=r1;*r0p=r0;}
+static void f723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-17);*r1p=r1;*r0p=r0;}
+static void f724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,32);*r1p=r1;*r0p=r0;}
+static void f725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-32);*r1p=r1;*r0p=r0;}
+static void f726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,31);*r1p=r1;*r0p=r0;}
+static void f727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-33);*r1p=r1;*r0p=r0;}
+static void f728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,64);*r1p=r1;*r0p=r0;}
+static void f729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-64);*r1p=r1;*r0p=r0;}
+static void f730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,63);*r1p=r1;*r0p=r0;}
+static void f731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-65);*r1p=r1;*r0p=r0;}
+static void f732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,128);*r1p=r1;*r0p=r0;}
+static void f733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-128);*r1p=r1;*r0p=r0;}
+static void f734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,127);*r1p=r1;*r0p=r0;}
+static void f735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-129);*r1p=r1;*r0p=r0;}
+static void f736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,256);*r1p=r1;*r0p=r0;}
+static void f737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-256);*r1p=r1;*r0p=r0;}
+static void f738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,255);*r1p=r1;*r0p=r0;}
+static void f739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-257);*r1p=r1;*r0p=r0;}
+static void f740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,512);*r1p=r1;*r0p=r0;}
+static void f741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-512);*r1p=r1;*r0p=r0;}
+static void f742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,511);*r1p=r1;*r0p=r0;}
+static void f743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-513);*r1p=r1;*r0p=r0;}
+static void f744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,1024);*r1p=r1;*r0p=r0;}
+static void f745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-1024);*r1p=r1;*r0p=r0;}
+static void f746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,1023);*r1p=r1;*r0p=r0;}
+static void f747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-1025);*r1p=r1;*r0p=r0;}
+static void f748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,2048);*r1p=r1;*r0p=r0;}
+static void f749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-2048);*r1p=r1;*r0p=r0;}
+static void f750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,2047);*r1p=r1;*r0p=r0;}
+static void f751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-2049);*r1p=r1;*r0p=r0;}
+static void f752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,4096);*r1p=r1;*r0p=r0;}
+static void f753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-4096);*r1p=r1;*r0p=r0;}
+static void f754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,4095);*r1p=r1;*r0p=r0;}
+static void f755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-4097);*r1p=r1;*r0p=r0;}
+static void f756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,8192);*r1p=r1;*r0p=r0;}
+static void f757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-8192);*r1p=r1;*r0p=r0;}
+static void f758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,8191);*r1p=r1;*r0p=r0;}
+static void f759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-8193);*r1p=r1;*r0p=r0;}
+static void f760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,16384);*r1p=r1;*r0p=r0;}
+static void f761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-16384);*r1p=r1;*r0p=r0;}
+static void f762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,16383);*r1p=r1;*r0p=r0;}
+static void f763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-16385);*r1p=r1;*r0p=r0;}
+static void f764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,32768);*r1p=r1;*r0p=r0;}
+static void f765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-32768);*r1p=r1;*r0p=r0;}
+static void f766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,32767);*r1p=r1;*r0p=r0;}
+static void f767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-5,0,-32769);*r1p=r1;*r0p=r0;}
+static void f768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,1);*r1p=r1;*r0p=r0;}
+static void f769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-1);*r1p=r1;*r0p=r0;}
+static void f770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,0);*r1p=r1;*r0p=r0;}
+static void f771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-2);*r1p=r1;*r0p=r0;}
+static void f772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,2);*r1p=r1;*r0p=r0;}
+static void f773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-2);*r1p=r1;*r0p=r0;}
+static void f774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,1);*r1p=r1;*r0p=r0;}
+static void f775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-3);*r1p=r1;*r0p=r0;}
+static void f776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,4);*r1p=r1;*r0p=r0;}
+static void f777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-4);*r1p=r1;*r0p=r0;}
+static void f778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,3);*r1p=r1;*r0p=r0;}
+static void f779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-5);*r1p=r1;*r0p=r0;}
+static void f780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,8);*r1p=r1;*r0p=r0;}
+static void f781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-8);*r1p=r1;*r0p=r0;}
+static void f782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,7);*r1p=r1;*r0p=r0;}
+static void f783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-9);*r1p=r1;*r0p=r0;}
+static void f784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,16);*r1p=r1;*r0p=r0;}
+static void f785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-16);*r1p=r1;*r0p=r0;}
+static void f786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,15);*r1p=r1;*r0p=r0;}
+static void f787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-17);*r1p=r1;*r0p=r0;}
+static void f788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,32);*r1p=r1;*r0p=r0;}
+static void f789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-32);*r1p=r1;*r0p=r0;}
+static void f790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,31);*r1p=r1;*r0p=r0;}
+static void f791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-33);*r1p=r1;*r0p=r0;}
+static void f792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,64);*r1p=r1;*r0p=r0;}
+static void f793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-64);*r1p=r1;*r0p=r0;}
+static void f794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,63);*r1p=r1;*r0p=r0;}
+static void f795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-65);*r1p=r1;*r0p=r0;}
+static void f796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,128);*r1p=r1;*r0p=r0;}
+static void f797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-128);*r1p=r1;*r0p=r0;}
+static void f798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,127);*r1p=r1;*r0p=r0;}
+static void f799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-129);*r1p=r1;*r0p=r0;}
+static void f800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,256);*r1p=r1;*r0p=r0;}
+static void f801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-256);*r1p=r1;*r0p=r0;}
+static void f802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,255);*r1p=r1;*r0p=r0;}
+static void f803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-257);*r1p=r1;*r0p=r0;}
+static void f804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,512);*r1p=r1;*r0p=r0;}
+static void f805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-512);*r1p=r1;*r0p=r0;}
+static void f806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,511);*r1p=r1;*r0p=r0;}
+static void f807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-513);*r1p=r1;*r0p=r0;}
+static void f808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,1024);*r1p=r1;*r0p=r0;}
+static void f809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-1024);*r1p=r1;*r0p=r0;}
+static void f810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,1023);*r1p=r1;*r0p=r0;}
+static void f811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-1025);*r1p=r1;*r0p=r0;}
+static void f812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,2048);*r1p=r1;*r0p=r0;}
+static void f813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-2048);*r1p=r1;*r0p=r0;}
+static void f814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,2047);*r1p=r1;*r0p=r0;}
+static void f815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-2049);*r1p=r1;*r0p=r0;}
+static void f816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,4096);*r1p=r1;*r0p=r0;}
+static void f817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-4096);*r1p=r1;*r0p=r0;}
+static void f818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,4095);*r1p=r1;*r0p=r0;}
+static void f819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-4097);*r1p=r1;*r0p=r0;}
+static void f820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,8192);*r1p=r1;*r0p=r0;}
+static void f821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-8192);*r1p=r1;*r0p=r0;}
+static void f822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,8191);*r1p=r1;*r0p=r0;}
+static void f823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-8193);*r1p=r1;*r0p=r0;}
+static void f824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,16384);*r1p=r1;*r0p=r0;}
+static void f825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-16384);*r1p=r1;*r0p=r0;}
+static void f826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,16383);*r1p=r1;*r0p=r0;}
+static void f827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-16385);*r1p=r1;*r0p=r0;}
+static void f828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,32768);*r1p=r1;*r0p=r0;}
+static void f829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-32768);*r1p=r1;*r0p=r0;}
+static void f830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,32767);*r1p=r1;*r0p=r0;}
+static void f831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8,0,-32769);*r1p=r1;*r0p=r0;}
+static void f832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,1);*r1p=r1;*r0p=r0;}
+static void f833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-1);*r1p=r1;*r0p=r0;}
+static void f834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,0);*r1p=r1;*r0p=r0;}
+static void f835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-2);*r1p=r1;*r0p=r0;}
+static void f836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,2);*r1p=r1;*r0p=r0;}
+static void f837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-2);*r1p=r1;*r0p=r0;}
+static void f838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,1);*r1p=r1;*r0p=r0;}
+static void f839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-3);*r1p=r1;*r0p=r0;}
+static void f840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,4);*r1p=r1;*r0p=r0;}
+static void f841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-4);*r1p=r1;*r0p=r0;}
+static void f842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,3);*r1p=r1;*r0p=r0;}
+static void f843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-5);*r1p=r1;*r0p=r0;}
+static void f844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,8);*r1p=r1;*r0p=r0;}
+static void f845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-8);*r1p=r1;*r0p=r0;}
+static void f846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,7);*r1p=r1;*r0p=r0;}
+static void f847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-9);*r1p=r1;*r0p=r0;}
+static void f848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,16);*r1p=r1;*r0p=r0;}
+static void f849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-16);*r1p=r1;*r0p=r0;}
+static void f850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,15);*r1p=r1;*r0p=r0;}
+static void f851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-17);*r1p=r1;*r0p=r0;}
+static void f852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,32);*r1p=r1;*r0p=r0;}
+static void f853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-32);*r1p=r1;*r0p=r0;}
+static void f854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,31);*r1p=r1;*r0p=r0;}
+static void f855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-33);*r1p=r1;*r0p=r0;}
+static void f856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,64);*r1p=r1;*r0p=r0;}
+static void f857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-64);*r1p=r1;*r0p=r0;}
+static void f858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,63);*r1p=r1;*r0p=r0;}
+static void f859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-65);*r1p=r1;*r0p=r0;}
+static void f860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,128);*r1p=r1;*r0p=r0;}
+static void f861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-128);*r1p=r1;*r0p=r0;}
+static void f862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,127);*r1p=r1;*r0p=r0;}
+static void f863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-129);*r1p=r1;*r0p=r0;}
+static void f864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,256);*r1p=r1;*r0p=r0;}
+static void f865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-256);*r1p=r1;*r0p=r0;}
+static void f866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,255);*r1p=r1;*r0p=r0;}
+static void f867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-257);*r1p=r1;*r0p=r0;}
+static void f868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,512);*r1p=r1;*r0p=r0;}
+static void f869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-512);*r1p=r1;*r0p=r0;}
+static void f870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,511);*r1p=r1;*r0p=r0;}
+static void f871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-513);*r1p=r1;*r0p=r0;}
+static void f872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,1024);*r1p=r1;*r0p=r0;}
+static void f873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-1024);*r1p=r1;*r0p=r0;}
+static void f874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,1023);*r1p=r1;*r0p=r0;}
+static void f875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-1025);*r1p=r1;*r0p=r0;}
+static void f876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,2048);*r1p=r1;*r0p=r0;}
+static void f877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-2048);*r1p=r1;*r0p=r0;}
+static void f878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,2047);*r1p=r1;*r0p=r0;}
+static void f879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-2049);*r1p=r1;*r0p=r0;}
+static void f880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,4096);*r1p=r1;*r0p=r0;}
+static void f881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-4096);*r1p=r1;*r0p=r0;}
+static void f882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,4095);*r1p=r1;*r0p=r0;}
+static void f883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-4097);*r1p=r1;*r0p=r0;}
+static void f884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,8192);*r1p=r1;*r0p=r0;}
+static void f885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-8192);*r1p=r1;*r0p=r0;}
+static void f886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,8191);*r1p=r1;*r0p=r0;}
+static void f887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-8193);*r1p=r1;*r0p=r0;}
+static void f888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,16384);*r1p=r1;*r0p=r0;}
+static void f889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-16384);*r1p=r1;*r0p=r0;}
+static void f890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,16383);*r1p=r1;*r0p=r0;}
+static void f891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-16385);*r1p=r1;*r0p=r0;}
+static void f892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,32768);*r1p=r1;*r0p=r0;}
+static void f893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-32768);*r1p=r1;*r0p=r0;}
+static void f894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,32767);*r1p=r1;*r0p=r0;}
+static void f895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8,0,-32769);*r1p=r1;*r0p=r0;}
+static void f896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,1);*r1p=r1;*r0p=r0;}
+static void f897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-1);*r1p=r1;*r0p=r0;}
+static void f898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,0);*r1p=r1;*r0p=r0;}
+static void f899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-2);*r1p=r1;*r0p=r0;}
+static void f900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,2);*r1p=r1;*r0p=r0;}
+static void f901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-2);*r1p=r1;*r0p=r0;}
+static void f902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,1);*r1p=r1;*r0p=r0;}
+static void f903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-3);*r1p=r1;*r0p=r0;}
+static void f904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,4);*r1p=r1;*r0p=r0;}
+static void f905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-4);*r1p=r1;*r0p=r0;}
+static void f906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,3);*r1p=r1;*r0p=r0;}
+static void f907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-5);*r1p=r1;*r0p=r0;}
+static void f908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,8);*r1p=r1;*r0p=r0;}
+static void f909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-8);*r1p=r1;*r0p=r0;}
+static void f910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,7);*r1p=r1;*r0p=r0;}
+static void f911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-9);*r1p=r1;*r0p=r0;}
+static void f912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,16);*r1p=r1;*r0p=r0;}
+static void f913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-16);*r1p=r1;*r0p=r0;}
+static void f914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,15);*r1p=r1;*r0p=r0;}
+static void f915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-17);*r1p=r1;*r0p=r0;}
+static void f916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,32);*r1p=r1;*r0p=r0;}
+static void f917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-32);*r1p=r1;*r0p=r0;}
+static void f918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,31);*r1p=r1;*r0p=r0;}
+static void f919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-33);*r1p=r1;*r0p=r0;}
+static void f920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,64);*r1p=r1;*r0p=r0;}
+static void f921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-64);*r1p=r1;*r0p=r0;}
+static void f922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,63);*r1p=r1;*r0p=r0;}
+static void f923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-65);*r1p=r1;*r0p=r0;}
+static void f924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,128);*r1p=r1;*r0p=r0;}
+static void f925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-128);*r1p=r1;*r0p=r0;}
+static void f926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,127);*r1p=r1;*r0p=r0;}
+static void f927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-129);*r1p=r1;*r0p=r0;}
+static void f928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,256);*r1p=r1;*r0p=r0;}
+static void f929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-256);*r1p=r1;*r0p=r0;}
+static void f930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,255);*r1p=r1;*r0p=r0;}
+static void f931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-257);*r1p=r1;*r0p=r0;}
+static void f932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,512);*r1p=r1;*r0p=r0;}
+static void f933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-512);*r1p=r1;*r0p=r0;}
+static void f934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,511);*r1p=r1;*r0p=r0;}
+static void f935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-513);*r1p=r1;*r0p=r0;}
+static void f936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,1024);*r1p=r1;*r0p=r0;}
+static void f937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-1024);*r1p=r1;*r0p=r0;}
+static void f938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,1023);*r1p=r1;*r0p=r0;}
+static void f939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-1025);*r1p=r1;*r0p=r0;}
+static void f940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,2048);*r1p=r1;*r0p=r0;}
+static void f941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-2048);*r1p=r1;*r0p=r0;}
+static void f942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,2047);*r1p=r1;*r0p=r0;}
+static void f943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-2049);*r1p=r1;*r0p=r0;}
+static void f944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,4096);*r1p=r1;*r0p=r0;}
+static void f945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-4096);*r1p=r1;*r0p=r0;}
+static void f946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,4095);*r1p=r1;*r0p=r0;}
+static void f947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-4097);*r1p=r1;*r0p=r0;}
+static void f948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,8192);*r1p=r1;*r0p=r0;}
+static void f949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-8192);*r1p=r1;*r0p=r0;}
+static void f950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,8191);*r1p=r1;*r0p=r0;}
+static void f951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-8193);*r1p=r1;*r0p=r0;}
+static void f952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,16384);*r1p=r1;*r0p=r0;}
+static void f953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-16384);*r1p=r1;*r0p=r0;}
+static void f954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,16383);*r1p=r1;*r0p=r0;}
+static void f955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-16385);*r1p=r1;*r0p=r0;}
+static void f956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,32768);*r1p=r1;*r0p=r0;}
+static void f957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-32768);*r1p=r1;*r0p=r0;}
+static void f958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,32767);*r1p=r1;*r0p=r0;}
+static void f959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,7,0,-32769);*r1p=r1;*r0p=r0;}
+static void f960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,1);*r1p=r1;*r0p=r0;}
+static void f961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-1);*r1p=r1;*r0p=r0;}
+static void f962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,0);*r1p=r1;*r0p=r0;}
+static void f963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-2);*r1p=r1;*r0p=r0;}
+static void f964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,2);*r1p=r1;*r0p=r0;}
+static void f965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-2);*r1p=r1;*r0p=r0;}
+static void f966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,1);*r1p=r1;*r0p=r0;}
+static void f967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-3);*r1p=r1;*r0p=r0;}
+static void f968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,4);*r1p=r1;*r0p=r0;}
+static void f969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-4);*r1p=r1;*r0p=r0;}
+static void f970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,3);*r1p=r1;*r0p=r0;}
+static void f971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-5);*r1p=r1;*r0p=r0;}
+static void f972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,8);*r1p=r1;*r0p=r0;}
+static void f973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-8);*r1p=r1;*r0p=r0;}
+static void f974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,7);*r1p=r1;*r0p=r0;}
+static void f975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-9);*r1p=r1;*r0p=r0;}
+static void f976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,16);*r1p=r1;*r0p=r0;}
+static void f977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-16);*r1p=r1;*r0p=r0;}
+static void f978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,15);*r1p=r1;*r0p=r0;}
+static void f979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-17);*r1p=r1;*r0p=r0;}
+static void f980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,32);*r1p=r1;*r0p=r0;}
+static void f981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-32);*r1p=r1;*r0p=r0;}
+static void f982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,31);*r1p=r1;*r0p=r0;}
+static void f983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-33);*r1p=r1;*r0p=r0;}
+static void f984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,64);*r1p=r1;*r0p=r0;}
+static void f985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-64);*r1p=r1;*r0p=r0;}
+static void f986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,63);*r1p=r1;*r0p=r0;}
+static void f987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-65);*r1p=r1;*r0p=r0;}
+static void f988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,128);*r1p=r1;*r0p=r0;}
+static void f989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-128);*r1p=r1;*r0p=r0;}
+static void f990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,127);*r1p=r1;*r0p=r0;}
+static void f991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-129);*r1p=r1;*r0p=r0;}
+static void f992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,256);*r1p=r1;*r0p=r0;}
+static void f993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-256);*r1p=r1;*r0p=r0;}
+static void f994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,255);*r1p=r1;*r0p=r0;}
+static void f995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-257);*r1p=r1;*r0p=r0;}
+static void f996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,512);*r1p=r1;*r0p=r0;}
+static void f997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-512);*r1p=r1;*r0p=r0;}
+static void f998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,511);*r1p=r1;*r0p=r0;}
+static void f999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-513);*r1p=r1;*r0p=r0;}
+static void f1000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,1024);*r1p=r1;*r0p=r0;}
+static void f1001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,1023);*r1p=r1;*r0p=r0;}
+static void f1003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,2048);*r1p=r1;*r0p=r0;}
+static void f1005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,2047);*r1p=r1;*r0p=r0;}
+static void f1007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,4096);*r1p=r1;*r0p=r0;}
+static void f1009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,4095);*r1p=r1;*r0p=r0;}
+static void f1011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,8192);*r1p=r1;*r0p=r0;}
+static void f1013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,8191);*r1p=r1;*r0p=r0;}
+static void f1015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,16384);*r1p=r1;*r0p=r0;}
+static void f1017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,16383);*r1p=r1;*r0p=r0;}
+static void f1019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,32768);*r1p=r1;*r0p=r0;}
+static void f1021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,32767);*r1p=r1;*r0p=r0;}
+static void f1023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-9,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,1);*r1p=r1;*r0p=r0;}
+static void f1025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-1);*r1p=r1;*r0p=r0;}
+static void f1026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,0);*r1p=r1;*r0p=r0;}
+static void f1027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,2);*r1p=r1;*r0p=r0;}
+static void f1029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,1);*r1p=r1;*r0p=r0;}
+static void f1031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-3);*r1p=r1;*r0p=r0;}
+static void f1032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,4);*r1p=r1;*r0p=r0;}
+static void f1033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-4);*r1p=r1;*r0p=r0;}
+static void f1034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,3);*r1p=r1;*r0p=r0;}
+static void f1035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-5);*r1p=r1;*r0p=r0;}
+static void f1036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,8);*r1p=r1;*r0p=r0;}
+static void f1037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-8);*r1p=r1;*r0p=r0;}
+static void f1038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,7);*r1p=r1;*r0p=r0;}
+static void f1039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-9);*r1p=r1;*r0p=r0;}
+static void f1040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,16);*r1p=r1;*r0p=r0;}
+static void f1041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-16);*r1p=r1;*r0p=r0;}
+static void f1042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,15);*r1p=r1;*r0p=r0;}
+static void f1043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-17);*r1p=r1;*r0p=r0;}
+static void f1044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,32);*r1p=r1;*r0p=r0;}
+static void f1045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-32);*r1p=r1;*r0p=r0;}
+static void f1046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,31);*r1p=r1;*r0p=r0;}
+static void f1047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-33);*r1p=r1;*r0p=r0;}
+static void f1048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,64);*r1p=r1;*r0p=r0;}
+static void f1049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-64);*r1p=r1;*r0p=r0;}
+static void f1050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,63);*r1p=r1;*r0p=r0;}
+static void f1051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-65);*r1p=r1;*r0p=r0;}
+static void f1052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,128);*r1p=r1;*r0p=r0;}
+static void f1053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-128);*r1p=r1;*r0p=r0;}
+static void f1054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,127);*r1p=r1;*r0p=r0;}
+static void f1055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-129);*r1p=r1;*r0p=r0;}
+static void f1056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,256);*r1p=r1;*r0p=r0;}
+static void f1057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-256);*r1p=r1;*r0p=r0;}
+static void f1058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,255);*r1p=r1;*r0p=r0;}
+static void f1059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-257);*r1p=r1;*r0p=r0;}
+static void f1060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,512);*r1p=r1;*r0p=r0;}
+static void f1061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-512);*r1p=r1;*r0p=r0;}
+static void f1062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,511);*r1p=r1;*r0p=r0;}
+static void f1063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-513);*r1p=r1;*r0p=r0;}
+static void f1064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,1024);*r1p=r1;*r0p=r0;}
+static void f1065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,1023);*r1p=r1;*r0p=r0;}
+static void f1067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,2048);*r1p=r1;*r0p=r0;}
+static void f1069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,2047);*r1p=r1;*r0p=r0;}
+static void f1071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,4096);*r1p=r1;*r0p=r0;}
+static void f1073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,4095);*r1p=r1;*r0p=r0;}
+static void f1075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,8192);*r1p=r1;*r0p=r0;}
+static void f1077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,8191);*r1p=r1;*r0p=r0;}
+static void f1079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,16384);*r1p=r1;*r0p=r0;}
+static void f1081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,16383);*r1p=r1;*r0p=r0;}
+static void f1083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,32768);*r1p=r1;*r0p=r0;}
+static void f1085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,32767);*r1p=r1;*r0p=r0;}
+static void f1087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,1);*r1p=r1;*r0p=r0;}
+static void f1089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-1);*r1p=r1;*r0p=r0;}
+static void f1090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,0);*r1p=r1;*r0p=r0;}
+static void f1091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,2);*r1p=r1;*r0p=r0;}
+static void f1093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-2);*r1p=r1;*r0p=r0;}
+static void f1094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,1);*r1p=r1;*r0p=r0;}
+static void f1095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-3);*r1p=r1;*r0p=r0;}
+static void f1096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,4);*r1p=r1;*r0p=r0;}
+static void f1097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-4);*r1p=r1;*r0p=r0;}
+static void f1098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,3);*r1p=r1;*r0p=r0;}
+static void f1099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-5);*r1p=r1;*r0p=r0;}
+static void f1100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,8);*r1p=r1;*r0p=r0;}
+static void f1101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-8);*r1p=r1;*r0p=r0;}
+static void f1102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,7);*r1p=r1;*r0p=r0;}
+static void f1103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-9);*r1p=r1;*r0p=r0;}
+static void f1104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,16);*r1p=r1;*r0p=r0;}
+static void f1105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-16);*r1p=r1;*r0p=r0;}
+static void f1106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,15);*r1p=r1;*r0p=r0;}
+static void f1107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-17);*r1p=r1;*r0p=r0;}
+static void f1108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,32);*r1p=r1;*r0p=r0;}
+static void f1109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-32);*r1p=r1;*r0p=r0;}
+static void f1110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,31);*r1p=r1;*r0p=r0;}
+static void f1111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-33);*r1p=r1;*r0p=r0;}
+static void f1112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,64);*r1p=r1;*r0p=r0;}
+static void f1113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-64);*r1p=r1;*r0p=r0;}
+static void f1114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,63);*r1p=r1;*r0p=r0;}
+static void f1115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-65);*r1p=r1;*r0p=r0;}
+static void f1116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,128);*r1p=r1;*r0p=r0;}
+static void f1117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-128);*r1p=r1;*r0p=r0;}
+static void f1118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,127);*r1p=r1;*r0p=r0;}
+static void f1119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-129);*r1p=r1;*r0p=r0;}
+static void f1120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,256);*r1p=r1;*r0p=r0;}
+static void f1121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-256);*r1p=r1;*r0p=r0;}
+static void f1122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,255);*r1p=r1;*r0p=r0;}
+static void f1123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-257);*r1p=r1;*r0p=r0;}
+static void f1124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,512);*r1p=r1;*r0p=r0;}
+static void f1125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-512);*r1p=r1;*r0p=r0;}
+static void f1126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,511);*r1p=r1;*r0p=r0;}
+static void f1127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-513);*r1p=r1;*r0p=r0;}
+static void f1128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,1024);*r1p=r1;*r0p=r0;}
+static void f1129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,1023);*r1p=r1;*r0p=r0;}
+static void f1131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,2048);*r1p=r1;*r0p=r0;}
+static void f1133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,2047);*r1p=r1;*r0p=r0;}
+static void f1135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,4096);*r1p=r1;*r0p=r0;}
+static void f1137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,4095);*r1p=r1;*r0p=r0;}
+static void f1139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,8192);*r1p=r1;*r0p=r0;}
+static void f1141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,8191);*r1p=r1;*r0p=r0;}
+static void f1143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,16384);*r1p=r1;*r0p=r0;}
+static void f1145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,16383);*r1p=r1;*r0p=r0;}
+static void f1147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,32768);*r1p=r1;*r0p=r0;}
+static void f1149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,32767);*r1p=r1;*r0p=r0;}
+static void f1151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,1);*r1p=r1;*r0p=r0;}
+static void f1153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-1);*r1p=r1;*r0p=r0;}
+static void f1154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,0);*r1p=r1;*r0p=r0;}
+static void f1155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-2);*r1p=r1;*r0p=r0;}
+static void f1156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,2);*r1p=r1;*r0p=r0;}
+static void f1157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-2);*r1p=r1;*r0p=r0;}
+static void f1158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,1);*r1p=r1;*r0p=r0;}
+static void f1159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-3);*r1p=r1;*r0p=r0;}
+static void f1160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,4);*r1p=r1;*r0p=r0;}
+static void f1161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-4);*r1p=r1;*r0p=r0;}
+static void f1162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,3);*r1p=r1;*r0p=r0;}
+static void f1163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-5);*r1p=r1;*r0p=r0;}
+static void f1164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,8);*r1p=r1;*r0p=r0;}
+static void f1165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-8);*r1p=r1;*r0p=r0;}
+static void f1166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,7);*r1p=r1;*r0p=r0;}
+static void f1167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-9);*r1p=r1;*r0p=r0;}
+static void f1168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,16);*r1p=r1;*r0p=r0;}
+static void f1169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-16);*r1p=r1;*r0p=r0;}
+static void f1170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,15);*r1p=r1;*r0p=r0;}
+static void f1171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-17);*r1p=r1;*r0p=r0;}
+static void f1172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,32);*r1p=r1;*r0p=r0;}
+static void f1173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-32);*r1p=r1;*r0p=r0;}
+static void f1174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,31);*r1p=r1;*r0p=r0;}
+static void f1175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-33);*r1p=r1;*r0p=r0;}
+static void f1176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,64);*r1p=r1;*r0p=r0;}
+static void f1177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-64);*r1p=r1;*r0p=r0;}
+static void f1178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,63);*r1p=r1;*r0p=r0;}
+static void f1179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-65);*r1p=r1;*r0p=r0;}
+static void f1180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,128);*r1p=r1;*r0p=r0;}
+static void f1181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-128);*r1p=r1;*r0p=r0;}
+static void f1182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,127);*r1p=r1;*r0p=r0;}
+static void f1183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-129);*r1p=r1;*r0p=r0;}
+static void f1184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,256);*r1p=r1;*r0p=r0;}
+static void f1185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-256);*r1p=r1;*r0p=r0;}
+static void f1186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,255);*r1p=r1;*r0p=r0;}
+static void f1187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-257);*r1p=r1;*r0p=r0;}
+static void f1188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,512);*r1p=r1;*r0p=r0;}
+static void f1189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-512);*r1p=r1;*r0p=r0;}
+static void f1190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,511);*r1p=r1;*r0p=r0;}
+static void f1191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-513);*r1p=r1;*r0p=r0;}
+static void f1192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,1024);*r1p=r1;*r0p=r0;}
+static void f1193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,1023);*r1p=r1;*r0p=r0;}
+static void f1195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,2048);*r1p=r1;*r0p=r0;}
+static void f1197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,2047);*r1p=r1;*r0p=r0;}
+static void f1199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,4096);*r1p=r1;*r0p=r0;}
+static void f1201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,4095);*r1p=r1;*r0p=r0;}
+static void f1203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,8192);*r1p=r1;*r0p=r0;}
+static void f1205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,8191);*r1p=r1;*r0p=r0;}
+static void f1207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,16384);*r1p=r1;*r0p=r0;}
+static void f1209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,16383);*r1p=r1;*r0p=r0;}
+static void f1211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,32768);*r1p=r1;*r0p=r0;}
+static void f1213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,32767);*r1p=r1;*r0p=r0;}
+static void f1215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,15,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,1);*r1p=r1;*r0p=r0;}
+static void f1217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-1);*r1p=r1;*r0p=r0;}
+static void f1218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,0);*r1p=r1;*r0p=r0;}
+static void f1219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-2);*r1p=r1;*r0p=r0;}
+static void f1220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,2);*r1p=r1;*r0p=r0;}
+static void f1221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-2);*r1p=r1;*r0p=r0;}
+static void f1222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,1);*r1p=r1;*r0p=r0;}
+static void f1223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-3);*r1p=r1;*r0p=r0;}
+static void f1224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,4);*r1p=r1;*r0p=r0;}
+static void f1225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-4);*r1p=r1;*r0p=r0;}
+static void f1226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,3);*r1p=r1;*r0p=r0;}
+static void f1227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-5);*r1p=r1;*r0p=r0;}
+static void f1228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,8);*r1p=r1;*r0p=r0;}
+static void f1229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-8);*r1p=r1;*r0p=r0;}
+static void f1230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,7);*r1p=r1;*r0p=r0;}
+static void f1231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-9);*r1p=r1;*r0p=r0;}
+static void f1232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,16);*r1p=r1;*r0p=r0;}
+static void f1233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-16);*r1p=r1;*r0p=r0;}
+static void f1234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,15);*r1p=r1;*r0p=r0;}
+static void f1235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-17);*r1p=r1;*r0p=r0;}
+static void f1236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,32);*r1p=r1;*r0p=r0;}
+static void f1237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-32);*r1p=r1;*r0p=r0;}
+static void f1238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,31);*r1p=r1;*r0p=r0;}
+static void f1239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-33);*r1p=r1;*r0p=r0;}
+static void f1240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,64);*r1p=r1;*r0p=r0;}
+static void f1241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-64);*r1p=r1;*r0p=r0;}
+static void f1242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,63);*r1p=r1;*r0p=r0;}
+static void f1243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-65);*r1p=r1;*r0p=r0;}
+static void f1244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,128);*r1p=r1;*r0p=r0;}
+static void f1245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-128);*r1p=r1;*r0p=r0;}
+static void f1246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,127);*r1p=r1;*r0p=r0;}
+static void f1247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-129);*r1p=r1;*r0p=r0;}
+static void f1248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,256);*r1p=r1;*r0p=r0;}
+static void f1249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-256);*r1p=r1;*r0p=r0;}
+static void f1250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,255);*r1p=r1;*r0p=r0;}
+static void f1251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-257);*r1p=r1;*r0p=r0;}
+static void f1252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,512);*r1p=r1;*r0p=r0;}
+static void f1253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-512);*r1p=r1;*r0p=r0;}
+static void f1254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,511);*r1p=r1;*r0p=r0;}
+static void f1255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-513);*r1p=r1;*r0p=r0;}
+static void f1256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,1024);*r1p=r1;*r0p=r0;}
+static void f1257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,1023);*r1p=r1;*r0p=r0;}
+static void f1259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,2048);*r1p=r1;*r0p=r0;}
+static void f1261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,2047);*r1p=r1;*r0p=r0;}
+static void f1263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,4096);*r1p=r1;*r0p=r0;}
+static void f1265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,4095);*r1p=r1;*r0p=r0;}
+static void f1267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,8192);*r1p=r1;*r0p=r0;}
+static void f1269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,8191);*r1p=r1;*r0p=r0;}
+static void f1271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,16384);*r1p=r1;*r0p=r0;}
+static void f1273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,16383);*r1p=r1;*r0p=r0;}
+static void f1275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,32768);*r1p=r1;*r0p=r0;}
+static void f1277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,32767);*r1p=r1;*r0p=r0;}
+static void f1279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-17,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,1);*r1p=r1;*r0p=r0;}
+static void f1281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-1);*r1p=r1;*r0p=r0;}
+static void f1282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,0);*r1p=r1;*r0p=r0;}
+static void f1283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,2);*r1p=r1;*r0p=r0;}
+static void f1285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,1);*r1p=r1;*r0p=r0;}
+static void f1287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-3);*r1p=r1;*r0p=r0;}
+static void f1288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,4);*r1p=r1;*r0p=r0;}
+static void f1289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-4);*r1p=r1;*r0p=r0;}
+static void f1290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,3);*r1p=r1;*r0p=r0;}
+static void f1291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-5);*r1p=r1;*r0p=r0;}
+static void f1292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,8);*r1p=r1;*r0p=r0;}
+static void f1293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-8);*r1p=r1;*r0p=r0;}
+static void f1294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,7);*r1p=r1;*r0p=r0;}
+static void f1295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-9);*r1p=r1;*r0p=r0;}
+static void f1296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,16);*r1p=r1;*r0p=r0;}
+static void f1297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-16);*r1p=r1;*r0p=r0;}
+static void f1298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,15);*r1p=r1;*r0p=r0;}
+static void f1299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-17);*r1p=r1;*r0p=r0;}
+static void f1300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,32);*r1p=r1;*r0p=r0;}
+static void f1301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-32);*r1p=r1;*r0p=r0;}
+static void f1302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,31);*r1p=r1;*r0p=r0;}
+static void f1303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-33);*r1p=r1;*r0p=r0;}
+static void f1304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,64);*r1p=r1;*r0p=r0;}
+static void f1305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-64);*r1p=r1;*r0p=r0;}
+static void f1306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,63);*r1p=r1;*r0p=r0;}
+static void f1307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-65);*r1p=r1;*r0p=r0;}
+static void f1308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,128);*r1p=r1;*r0p=r0;}
+static void f1309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-128);*r1p=r1;*r0p=r0;}
+static void f1310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,127);*r1p=r1;*r0p=r0;}
+static void f1311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-129);*r1p=r1;*r0p=r0;}
+static void f1312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,256);*r1p=r1;*r0p=r0;}
+static void f1313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-256);*r1p=r1;*r0p=r0;}
+static void f1314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,255);*r1p=r1;*r0p=r0;}
+static void f1315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-257);*r1p=r1;*r0p=r0;}
+static void f1316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,512);*r1p=r1;*r0p=r0;}
+static void f1317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-512);*r1p=r1;*r0p=r0;}
+static void f1318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,511);*r1p=r1;*r0p=r0;}
+static void f1319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-513);*r1p=r1;*r0p=r0;}
+static void f1320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,1024);*r1p=r1;*r0p=r0;}
+static void f1321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,1023);*r1p=r1;*r0p=r0;}
+static void f1323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,2048);*r1p=r1;*r0p=r0;}
+static void f1325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,2047);*r1p=r1;*r0p=r0;}
+static void f1327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,4096);*r1p=r1;*r0p=r0;}
+static void f1329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,4095);*r1p=r1;*r0p=r0;}
+static void f1331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,8192);*r1p=r1;*r0p=r0;}
+static void f1333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,8191);*r1p=r1;*r0p=r0;}
+static void f1335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,16384);*r1p=r1;*r0p=r0;}
+static void f1337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,16383);*r1p=r1;*r0p=r0;}
+static void f1339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,32768);*r1p=r1;*r0p=r0;}
+static void f1341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,32767);*r1p=r1;*r0p=r0;}
+static void f1343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,1);*r1p=r1;*r0p=r0;}
+static void f1345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-1);*r1p=r1;*r0p=r0;}
+static void f1346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,0);*r1p=r1;*r0p=r0;}
+static void f1347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,2);*r1p=r1;*r0p=r0;}
+static void f1349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-2);*r1p=r1;*r0p=r0;}
+static void f1350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,1);*r1p=r1;*r0p=r0;}
+static void f1351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-3);*r1p=r1;*r0p=r0;}
+static void f1352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,4);*r1p=r1;*r0p=r0;}
+static void f1353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-4);*r1p=r1;*r0p=r0;}
+static void f1354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,3);*r1p=r1;*r0p=r0;}
+static void f1355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-5);*r1p=r1;*r0p=r0;}
+static void f1356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,8);*r1p=r1;*r0p=r0;}
+static void f1357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-8);*r1p=r1;*r0p=r0;}
+static void f1358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,7);*r1p=r1;*r0p=r0;}
+static void f1359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-9);*r1p=r1;*r0p=r0;}
+static void f1360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,16);*r1p=r1;*r0p=r0;}
+static void f1361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-16);*r1p=r1;*r0p=r0;}
+static void f1362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,15);*r1p=r1;*r0p=r0;}
+static void f1363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-17);*r1p=r1;*r0p=r0;}
+static void f1364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,32);*r1p=r1;*r0p=r0;}
+static void f1365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-32);*r1p=r1;*r0p=r0;}
+static void f1366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,31);*r1p=r1;*r0p=r0;}
+static void f1367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-33);*r1p=r1;*r0p=r0;}
+static void f1368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,64);*r1p=r1;*r0p=r0;}
+static void f1369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-64);*r1p=r1;*r0p=r0;}
+static void f1370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,63);*r1p=r1;*r0p=r0;}
+static void f1371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-65);*r1p=r1;*r0p=r0;}
+static void f1372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,128);*r1p=r1;*r0p=r0;}
+static void f1373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-128);*r1p=r1;*r0p=r0;}
+static void f1374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,127);*r1p=r1;*r0p=r0;}
+static void f1375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-129);*r1p=r1;*r0p=r0;}
+static void f1376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,256);*r1p=r1;*r0p=r0;}
+static void f1377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-256);*r1p=r1;*r0p=r0;}
+static void f1378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,255);*r1p=r1;*r0p=r0;}
+static void f1379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-257);*r1p=r1;*r0p=r0;}
+static void f1380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,512);*r1p=r1;*r0p=r0;}
+static void f1381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-512);*r1p=r1;*r0p=r0;}
+static void f1382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,511);*r1p=r1;*r0p=r0;}
+static void f1383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-513);*r1p=r1;*r0p=r0;}
+static void f1384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,1024);*r1p=r1;*r0p=r0;}
+static void f1385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,1023);*r1p=r1;*r0p=r0;}
+static void f1387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,2048);*r1p=r1;*r0p=r0;}
+static void f1389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,2047);*r1p=r1;*r0p=r0;}
+static void f1391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,4096);*r1p=r1;*r0p=r0;}
+static void f1393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,4095);*r1p=r1;*r0p=r0;}
+static void f1395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,8192);*r1p=r1;*r0p=r0;}
+static void f1397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,8191);*r1p=r1;*r0p=r0;}
+static void f1399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,16384);*r1p=r1;*r0p=r0;}
+static void f1401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,16383);*r1p=r1;*r0p=r0;}
+static void f1403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,32768);*r1p=r1;*r0p=r0;}
+static void f1405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,32767);*r1p=r1;*r0p=r0;}
+static void f1407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,1);*r1p=r1;*r0p=r0;}
+static void f1409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-1);*r1p=r1;*r0p=r0;}
+static void f1410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,0);*r1p=r1;*r0p=r0;}
+static void f1411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-2);*r1p=r1;*r0p=r0;}
+static void f1412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,2);*r1p=r1;*r0p=r0;}
+static void f1413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-2);*r1p=r1;*r0p=r0;}
+static void f1414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,1);*r1p=r1;*r0p=r0;}
+static void f1415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-3);*r1p=r1;*r0p=r0;}
+static void f1416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,4);*r1p=r1;*r0p=r0;}
+static void f1417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-4);*r1p=r1;*r0p=r0;}
+static void f1418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,3);*r1p=r1;*r0p=r0;}
+static void f1419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-5);*r1p=r1;*r0p=r0;}
+static void f1420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,8);*r1p=r1;*r0p=r0;}
+static void f1421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-8);*r1p=r1;*r0p=r0;}
+static void f1422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,7);*r1p=r1;*r0p=r0;}
+static void f1423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-9);*r1p=r1;*r0p=r0;}
+static void f1424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,16);*r1p=r1;*r0p=r0;}
+static void f1425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-16);*r1p=r1;*r0p=r0;}
+static void f1426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,15);*r1p=r1;*r0p=r0;}
+static void f1427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-17);*r1p=r1;*r0p=r0;}
+static void f1428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,32);*r1p=r1;*r0p=r0;}
+static void f1429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-32);*r1p=r1;*r0p=r0;}
+static void f1430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,31);*r1p=r1;*r0p=r0;}
+static void f1431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-33);*r1p=r1;*r0p=r0;}
+static void f1432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,64);*r1p=r1;*r0p=r0;}
+static void f1433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-64);*r1p=r1;*r0p=r0;}
+static void f1434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,63);*r1p=r1;*r0p=r0;}
+static void f1435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-65);*r1p=r1;*r0p=r0;}
+static void f1436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,128);*r1p=r1;*r0p=r0;}
+static void f1437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-128);*r1p=r1;*r0p=r0;}
+static void f1438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,127);*r1p=r1;*r0p=r0;}
+static void f1439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-129);*r1p=r1;*r0p=r0;}
+static void f1440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,256);*r1p=r1;*r0p=r0;}
+static void f1441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-256);*r1p=r1;*r0p=r0;}
+static void f1442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,255);*r1p=r1;*r0p=r0;}
+static void f1443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-257);*r1p=r1;*r0p=r0;}
+static void f1444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,512);*r1p=r1;*r0p=r0;}
+static void f1445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-512);*r1p=r1;*r0p=r0;}
+static void f1446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,511);*r1p=r1;*r0p=r0;}
+static void f1447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-513);*r1p=r1;*r0p=r0;}
+static void f1448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,1024);*r1p=r1;*r0p=r0;}
+static void f1449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,1023);*r1p=r1;*r0p=r0;}
+static void f1451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,2048);*r1p=r1;*r0p=r0;}
+static void f1453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,2047);*r1p=r1;*r0p=r0;}
+static void f1455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,4096);*r1p=r1;*r0p=r0;}
+static void f1457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,4095);*r1p=r1;*r0p=r0;}
+static void f1459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,8192);*r1p=r1;*r0p=r0;}
+static void f1461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,8191);*r1p=r1;*r0p=r0;}
+static void f1463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,16384);*r1p=r1;*r0p=r0;}
+static void f1465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,16383);*r1p=r1;*r0p=r0;}
+static void f1467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,32768);*r1p=r1;*r0p=r0;}
+static void f1469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,32767);*r1p=r1;*r0p=r0;}
+static void f1471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,31,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,1);*r1p=r1;*r0p=r0;}
+static void f1473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-1);*r1p=r1;*r0p=r0;}
+static void f1474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,0);*r1p=r1;*r0p=r0;}
+static void f1475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-2);*r1p=r1;*r0p=r0;}
+static void f1476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,2);*r1p=r1;*r0p=r0;}
+static void f1477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-2);*r1p=r1;*r0p=r0;}
+static void f1478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,1);*r1p=r1;*r0p=r0;}
+static void f1479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-3);*r1p=r1;*r0p=r0;}
+static void f1480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,4);*r1p=r1;*r0p=r0;}
+static void f1481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-4);*r1p=r1;*r0p=r0;}
+static void f1482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,3);*r1p=r1;*r0p=r0;}
+static void f1483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-5);*r1p=r1;*r0p=r0;}
+static void f1484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,8);*r1p=r1;*r0p=r0;}
+static void f1485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-8);*r1p=r1;*r0p=r0;}
+static void f1486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,7);*r1p=r1;*r0p=r0;}
+static void f1487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-9);*r1p=r1;*r0p=r0;}
+static void f1488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,16);*r1p=r1;*r0p=r0;}
+static void f1489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-16);*r1p=r1;*r0p=r0;}
+static void f1490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,15);*r1p=r1;*r0p=r0;}
+static void f1491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-17);*r1p=r1;*r0p=r0;}
+static void f1492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,32);*r1p=r1;*r0p=r0;}
+static void f1493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-32);*r1p=r1;*r0p=r0;}
+static void f1494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,31);*r1p=r1;*r0p=r0;}
+static void f1495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-33);*r1p=r1;*r0p=r0;}
+static void f1496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,64);*r1p=r1;*r0p=r0;}
+static void f1497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-64);*r1p=r1;*r0p=r0;}
+static void f1498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,63);*r1p=r1;*r0p=r0;}
+static void f1499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-65);*r1p=r1;*r0p=r0;}
+static void f1500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,128);*r1p=r1;*r0p=r0;}
+static void f1501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-128);*r1p=r1;*r0p=r0;}
+static void f1502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,127);*r1p=r1;*r0p=r0;}
+static void f1503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-129);*r1p=r1;*r0p=r0;}
+static void f1504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,256);*r1p=r1;*r0p=r0;}
+static void f1505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-256);*r1p=r1;*r0p=r0;}
+static void f1506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,255);*r1p=r1;*r0p=r0;}
+static void f1507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-257);*r1p=r1;*r0p=r0;}
+static void f1508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,512);*r1p=r1;*r0p=r0;}
+static void f1509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-512);*r1p=r1;*r0p=r0;}
+static void f1510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,511);*r1p=r1;*r0p=r0;}
+static void f1511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-513);*r1p=r1;*r0p=r0;}
+static void f1512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,1024);*r1p=r1;*r0p=r0;}
+static void f1513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,1023);*r1p=r1;*r0p=r0;}
+static void f1515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,2048);*r1p=r1;*r0p=r0;}
+static void f1517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,2047);*r1p=r1;*r0p=r0;}
+static void f1519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,4096);*r1p=r1;*r0p=r0;}
+static void f1521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,4095);*r1p=r1;*r0p=r0;}
+static void f1523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,8192);*r1p=r1;*r0p=r0;}
+static void f1525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,8191);*r1p=r1;*r0p=r0;}
+static void f1527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,16384);*r1p=r1;*r0p=r0;}
+static void f1529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,16383);*r1p=r1;*r0p=r0;}
+static void f1531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,32768);*r1p=r1;*r0p=r0;}
+static void f1533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,32767);*r1p=r1;*r0p=r0;}
+static void f1535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-33,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,1);*r1p=r1;*r0p=r0;}
+static void f1537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-1);*r1p=r1;*r0p=r0;}
+static void f1538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,0);*r1p=r1;*r0p=r0;}
+static void f1539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,2);*r1p=r1;*r0p=r0;}
+static void f1541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,1);*r1p=r1;*r0p=r0;}
+static void f1543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-3);*r1p=r1;*r0p=r0;}
+static void f1544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,4);*r1p=r1;*r0p=r0;}
+static void f1545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-4);*r1p=r1;*r0p=r0;}
+static void f1546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,3);*r1p=r1;*r0p=r0;}
+static void f1547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-5);*r1p=r1;*r0p=r0;}
+static void f1548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,8);*r1p=r1;*r0p=r0;}
+static void f1549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-8);*r1p=r1;*r0p=r0;}
+static void f1550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,7);*r1p=r1;*r0p=r0;}
+static void f1551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-9);*r1p=r1;*r0p=r0;}
+static void f1552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,16);*r1p=r1;*r0p=r0;}
+static void f1553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-16);*r1p=r1;*r0p=r0;}
+static void f1554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,15);*r1p=r1;*r0p=r0;}
+static void f1555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-17);*r1p=r1;*r0p=r0;}
+static void f1556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,32);*r1p=r1;*r0p=r0;}
+static void f1557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-32);*r1p=r1;*r0p=r0;}
+static void f1558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,31);*r1p=r1;*r0p=r0;}
+static void f1559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-33);*r1p=r1;*r0p=r0;}
+static void f1560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,64);*r1p=r1;*r0p=r0;}
+static void f1561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-64);*r1p=r1;*r0p=r0;}
+static void f1562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,63);*r1p=r1;*r0p=r0;}
+static void f1563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-65);*r1p=r1;*r0p=r0;}
+static void f1564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,128);*r1p=r1;*r0p=r0;}
+static void f1565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-128);*r1p=r1;*r0p=r0;}
+static void f1566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,127);*r1p=r1;*r0p=r0;}
+static void f1567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-129);*r1p=r1;*r0p=r0;}
+static void f1568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,256);*r1p=r1;*r0p=r0;}
+static void f1569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-256);*r1p=r1;*r0p=r0;}
+static void f1570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,255);*r1p=r1;*r0p=r0;}
+static void f1571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-257);*r1p=r1;*r0p=r0;}
+static void f1572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,512);*r1p=r1;*r0p=r0;}
+static void f1573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-512);*r1p=r1;*r0p=r0;}
+static void f1574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,511);*r1p=r1;*r0p=r0;}
+static void f1575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-513);*r1p=r1;*r0p=r0;}
+static void f1576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,1024);*r1p=r1;*r0p=r0;}
+static void f1577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,1023);*r1p=r1;*r0p=r0;}
+static void f1579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,2048);*r1p=r1;*r0p=r0;}
+static void f1581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,2047);*r1p=r1;*r0p=r0;}
+static void f1583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,4096);*r1p=r1;*r0p=r0;}
+static void f1585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,4095);*r1p=r1;*r0p=r0;}
+static void f1587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,8192);*r1p=r1;*r0p=r0;}
+static void f1589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,8191);*r1p=r1;*r0p=r0;}
+static void f1591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,16384);*r1p=r1;*r0p=r0;}
+static void f1593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,16383);*r1p=r1;*r0p=r0;}
+static void f1595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,32768);*r1p=r1;*r0p=r0;}
+static void f1597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,32767);*r1p=r1;*r0p=r0;}
+static void f1599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,64,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,1);*r1p=r1;*r0p=r0;}
+static void f1601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-1);*r1p=r1;*r0p=r0;}
+static void f1602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,0);*r1p=r1;*r0p=r0;}
+static void f1603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,2);*r1p=r1;*r0p=r0;}
+static void f1605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-2);*r1p=r1;*r0p=r0;}
+static void f1606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,1);*r1p=r1;*r0p=r0;}
+static void f1607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-3);*r1p=r1;*r0p=r0;}
+static void f1608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,4);*r1p=r1;*r0p=r0;}
+static void f1609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-4);*r1p=r1;*r0p=r0;}
+static void f1610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,3);*r1p=r1;*r0p=r0;}
+static void f1611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-5);*r1p=r1;*r0p=r0;}
+static void f1612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,8);*r1p=r1;*r0p=r0;}
+static void f1613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-8);*r1p=r1;*r0p=r0;}
+static void f1614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,7);*r1p=r1;*r0p=r0;}
+static void f1615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-9);*r1p=r1;*r0p=r0;}
+static void f1616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,16);*r1p=r1;*r0p=r0;}
+static void f1617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-16);*r1p=r1;*r0p=r0;}
+static void f1618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,15);*r1p=r1;*r0p=r0;}
+static void f1619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-17);*r1p=r1;*r0p=r0;}
+static void f1620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,32);*r1p=r1;*r0p=r0;}
+static void f1621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-32);*r1p=r1;*r0p=r0;}
+static void f1622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,31);*r1p=r1;*r0p=r0;}
+static void f1623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-33);*r1p=r1;*r0p=r0;}
+static void f1624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,64);*r1p=r1;*r0p=r0;}
+static void f1625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-64);*r1p=r1;*r0p=r0;}
+static void f1626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,63);*r1p=r1;*r0p=r0;}
+static void f1627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-65);*r1p=r1;*r0p=r0;}
+static void f1628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,128);*r1p=r1;*r0p=r0;}
+static void f1629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-128);*r1p=r1;*r0p=r0;}
+static void f1630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,127);*r1p=r1;*r0p=r0;}
+static void f1631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-129);*r1p=r1;*r0p=r0;}
+static void f1632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,256);*r1p=r1;*r0p=r0;}
+static void f1633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-256);*r1p=r1;*r0p=r0;}
+static void f1634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,255);*r1p=r1;*r0p=r0;}
+static void f1635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-257);*r1p=r1;*r0p=r0;}
+static void f1636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,512);*r1p=r1;*r0p=r0;}
+static void f1637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-512);*r1p=r1;*r0p=r0;}
+static void f1638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,511);*r1p=r1;*r0p=r0;}
+static void f1639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-513);*r1p=r1;*r0p=r0;}
+static void f1640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,1024);*r1p=r1;*r0p=r0;}
+static void f1641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,1023);*r1p=r1;*r0p=r0;}
+static void f1643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,2048);*r1p=r1;*r0p=r0;}
+static void f1645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,2047);*r1p=r1;*r0p=r0;}
+static void f1647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,4096);*r1p=r1;*r0p=r0;}
+static void f1649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,4095);*r1p=r1;*r0p=r0;}
+static void f1651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,8192);*r1p=r1;*r0p=r0;}
+static void f1653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,8191);*r1p=r1;*r0p=r0;}
+static void f1655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,16384);*r1p=r1;*r0p=r0;}
+static void f1657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,16383);*r1p=r1;*r0p=r0;}
+static void f1659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,32768);*r1p=r1;*r0p=r0;}
+static void f1661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,32767);*r1p=r1;*r0p=r0;}
+static void f1663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-64,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,1);*r1p=r1;*r0p=r0;}
+static void f1665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-1);*r1p=r1;*r0p=r0;}
+static void f1666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,0);*r1p=r1;*r0p=r0;}
+static void f1667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-2);*r1p=r1;*r0p=r0;}
+static void f1668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,2);*r1p=r1;*r0p=r0;}
+static void f1669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-2);*r1p=r1;*r0p=r0;}
+static void f1670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,1);*r1p=r1;*r0p=r0;}
+static void f1671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-3);*r1p=r1;*r0p=r0;}
+static void f1672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,4);*r1p=r1;*r0p=r0;}
+static void f1673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-4);*r1p=r1;*r0p=r0;}
+static void f1674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,3);*r1p=r1;*r0p=r0;}
+static void f1675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-5);*r1p=r1;*r0p=r0;}
+static void f1676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,8);*r1p=r1;*r0p=r0;}
+static void f1677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-8);*r1p=r1;*r0p=r0;}
+static void f1678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,7);*r1p=r1;*r0p=r0;}
+static void f1679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-9);*r1p=r1;*r0p=r0;}
+static void f1680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,16);*r1p=r1;*r0p=r0;}
+static void f1681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-16);*r1p=r1;*r0p=r0;}
+static void f1682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,15);*r1p=r1;*r0p=r0;}
+static void f1683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-17);*r1p=r1;*r0p=r0;}
+static void f1684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,32);*r1p=r1;*r0p=r0;}
+static void f1685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-32);*r1p=r1;*r0p=r0;}
+static void f1686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,31);*r1p=r1;*r0p=r0;}
+static void f1687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-33);*r1p=r1;*r0p=r0;}
+static void f1688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,64);*r1p=r1;*r0p=r0;}
+static void f1689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-64);*r1p=r1;*r0p=r0;}
+static void f1690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,63);*r1p=r1;*r0p=r0;}
+static void f1691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-65);*r1p=r1;*r0p=r0;}
+static void f1692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,128);*r1p=r1;*r0p=r0;}
+static void f1693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-128);*r1p=r1;*r0p=r0;}
+static void f1694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,127);*r1p=r1;*r0p=r0;}
+static void f1695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-129);*r1p=r1;*r0p=r0;}
+static void f1696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,256);*r1p=r1;*r0p=r0;}
+static void f1697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-256);*r1p=r1;*r0p=r0;}
+static void f1698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,255);*r1p=r1;*r0p=r0;}
+static void f1699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-257);*r1p=r1;*r0p=r0;}
+static void f1700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,512);*r1p=r1;*r0p=r0;}
+static void f1701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-512);*r1p=r1;*r0p=r0;}
+static void f1702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,511);*r1p=r1;*r0p=r0;}
+static void f1703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-513);*r1p=r1;*r0p=r0;}
+static void f1704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,1024);*r1p=r1;*r0p=r0;}
+static void f1705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,1023);*r1p=r1;*r0p=r0;}
+static void f1707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,2048);*r1p=r1;*r0p=r0;}
+static void f1709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,2047);*r1p=r1;*r0p=r0;}
+static void f1711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,4096);*r1p=r1;*r0p=r0;}
+static void f1713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,4095);*r1p=r1;*r0p=r0;}
+static void f1715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,8192);*r1p=r1;*r0p=r0;}
+static void f1717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,8191);*r1p=r1;*r0p=r0;}
+static void f1719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,16384);*r1p=r1;*r0p=r0;}
+static void f1721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,16383);*r1p=r1;*r0p=r0;}
+static void f1723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,32768);*r1p=r1;*r0p=r0;}
+static void f1725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,32767);*r1p=r1;*r0p=r0;}
+static void f1727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,63,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,1);*r1p=r1;*r0p=r0;}
+static void f1729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-1);*r1p=r1;*r0p=r0;}
+static void f1730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,0);*r1p=r1;*r0p=r0;}
+static void f1731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-2);*r1p=r1;*r0p=r0;}
+static void f1732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,2);*r1p=r1;*r0p=r0;}
+static void f1733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-2);*r1p=r1;*r0p=r0;}
+static void f1734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,1);*r1p=r1;*r0p=r0;}
+static void f1735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-3);*r1p=r1;*r0p=r0;}
+static void f1736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,4);*r1p=r1;*r0p=r0;}
+static void f1737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-4);*r1p=r1;*r0p=r0;}
+static void f1738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,3);*r1p=r1;*r0p=r0;}
+static void f1739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-5);*r1p=r1;*r0p=r0;}
+static void f1740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,8);*r1p=r1;*r0p=r0;}
+static void f1741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-8);*r1p=r1;*r0p=r0;}
+static void f1742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,7);*r1p=r1;*r0p=r0;}
+static void f1743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-9);*r1p=r1;*r0p=r0;}
+static void f1744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,16);*r1p=r1;*r0p=r0;}
+static void f1745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-16);*r1p=r1;*r0p=r0;}
+static void f1746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,15);*r1p=r1;*r0p=r0;}
+static void f1747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-17);*r1p=r1;*r0p=r0;}
+static void f1748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,32);*r1p=r1;*r0p=r0;}
+static void f1749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-32);*r1p=r1;*r0p=r0;}
+static void f1750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,31);*r1p=r1;*r0p=r0;}
+static void f1751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-33);*r1p=r1;*r0p=r0;}
+static void f1752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,64);*r1p=r1;*r0p=r0;}
+static void f1753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-64);*r1p=r1;*r0p=r0;}
+static void f1754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,63);*r1p=r1;*r0p=r0;}
+static void f1755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-65);*r1p=r1;*r0p=r0;}
+static void f1756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,128);*r1p=r1;*r0p=r0;}
+static void f1757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-128);*r1p=r1;*r0p=r0;}
+static void f1758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,127);*r1p=r1;*r0p=r0;}
+static void f1759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-129);*r1p=r1;*r0p=r0;}
+static void f1760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,256);*r1p=r1;*r0p=r0;}
+static void f1761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-256);*r1p=r1;*r0p=r0;}
+static void f1762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,255);*r1p=r1;*r0p=r0;}
+static void f1763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-257);*r1p=r1;*r0p=r0;}
+static void f1764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,512);*r1p=r1;*r0p=r0;}
+static void f1765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-512);*r1p=r1;*r0p=r0;}
+static void f1766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,511);*r1p=r1;*r0p=r0;}
+static void f1767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-513);*r1p=r1;*r0p=r0;}
+static void f1768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,1024);*r1p=r1;*r0p=r0;}
+static void f1769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,1023);*r1p=r1;*r0p=r0;}
+static void f1771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,2048);*r1p=r1;*r0p=r0;}
+static void f1773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,2047);*r1p=r1;*r0p=r0;}
+static void f1775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,4096);*r1p=r1;*r0p=r0;}
+static void f1777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,4095);*r1p=r1;*r0p=r0;}
+static void f1779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,8192);*r1p=r1;*r0p=r0;}
+static void f1781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,8191);*r1p=r1;*r0p=r0;}
+static void f1783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,16384);*r1p=r1;*r0p=r0;}
+static void f1785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,16383);*r1p=r1;*r0p=r0;}
+static void f1787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,32768);*r1p=r1;*r0p=r0;}
+static void f1789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,32767);*r1p=r1;*r0p=r0;}
+static void f1791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-65,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,1);*r1p=r1;*r0p=r0;}
+static void f1793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-1);*r1p=r1;*r0p=r0;}
+static void f1794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,0);*r1p=r1;*r0p=r0;}
+static void f1795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,2);*r1p=r1;*r0p=r0;}
+static void f1797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,1);*r1p=r1;*r0p=r0;}
+static void f1799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-3);*r1p=r1;*r0p=r0;}
+static void f1800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,4);*r1p=r1;*r0p=r0;}
+static void f1801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-4);*r1p=r1;*r0p=r0;}
+static void f1802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,3);*r1p=r1;*r0p=r0;}
+static void f1803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-5);*r1p=r1;*r0p=r0;}
+static void f1804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,8);*r1p=r1;*r0p=r0;}
+static void f1805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-8);*r1p=r1;*r0p=r0;}
+static void f1806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,7);*r1p=r1;*r0p=r0;}
+static void f1807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-9);*r1p=r1;*r0p=r0;}
+static void f1808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,16);*r1p=r1;*r0p=r0;}
+static void f1809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-16);*r1p=r1;*r0p=r0;}
+static void f1810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,15);*r1p=r1;*r0p=r0;}
+static void f1811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-17);*r1p=r1;*r0p=r0;}
+static void f1812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,32);*r1p=r1;*r0p=r0;}
+static void f1813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-32);*r1p=r1;*r0p=r0;}
+static void f1814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,31);*r1p=r1;*r0p=r0;}
+static void f1815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-33);*r1p=r1;*r0p=r0;}
+static void f1816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,64);*r1p=r1;*r0p=r0;}
+static void f1817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-64);*r1p=r1;*r0p=r0;}
+static void f1818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,63);*r1p=r1;*r0p=r0;}
+static void f1819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-65);*r1p=r1;*r0p=r0;}
+static void f1820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,128);*r1p=r1;*r0p=r0;}
+static void f1821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-128);*r1p=r1;*r0p=r0;}
+static void f1822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,127);*r1p=r1;*r0p=r0;}
+static void f1823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-129);*r1p=r1;*r0p=r0;}
+static void f1824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,256);*r1p=r1;*r0p=r0;}
+static void f1825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-256);*r1p=r1;*r0p=r0;}
+static void f1826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,255);*r1p=r1;*r0p=r0;}
+static void f1827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-257);*r1p=r1;*r0p=r0;}
+static void f1828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,512);*r1p=r1;*r0p=r0;}
+static void f1829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-512);*r1p=r1;*r0p=r0;}
+static void f1830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,511);*r1p=r1;*r0p=r0;}
+static void f1831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-513);*r1p=r1;*r0p=r0;}
+static void f1832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,1024);*r1p=r1;*r0p=r0;}
+static void f1833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,1023);*r1p=r1;*r0p=r0;}
+static void f1835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,2048);*r1p=r1;*r0p=r0;}
+static void f1837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,2047);*r1p=r1;*r0p=r0;}
+static void f1839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,4096);*r1p=r1;*r0p=r0;}
+static void f1841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,4095);*r1p=r1;*r0p=r0;}
+static void f1843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,8192);*r1p=r1;*r0p=r0;}
+static void f1845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,8191);*r1p=r1;*r0p=r0;}
+static void f1847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,16384);*r1p=r1;*r0p=r0;}
+static void f1849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,16383);*r1p=r1;*r0p=r0;}
+static void f1851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,32768);*r1p=r1;*r0p=r0;}
+static void f1853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,32767);*r1p=r1;*r0p=r0;}
+static void f1855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,128,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,1);*r1p=r1;*r0p=r0;}
+static void f1857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-1);*r1p=r1;*r0p=r0;}
+static void f1858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,0);*r1p=r1;*r0p=r0;}
+static void f1859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,2);*r1p=r1;*r0p=r0;}
+static void f1861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-2);*r1p=r1;*r0p=r0;}
+static void f1862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,1);*r1p=r1;*r0p=r0;}
+static void f1863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-3);*r1p=r1;*r0p=r0;}
+static void f1864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,4);*r1p=r1;*r0p=r0;}
+static void f1865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-4);*r1p=r1;*r0p=r0;}
+static void f1866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,3);*r1p=r1;*r0p=r0;}
+static void f1867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-5);*r1p=r1;*r0p=r0;}
+static void f1868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,8);*r1p=r1;*r0p=r0;}
+static void f1869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-8);*r1p=r1;*r0p=r0;}
+static void f1870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,7);*r1p=r1;*r0p=r0;}
+static void f1871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-9);*r1p=r1;*r0p=r0;}
+static void f1872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,16);*r1p=r1;*r0p=r0;}
+static void f1873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-16);*r1p=r1;*r0p=r0;}
+static void f1874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,15);*r1p=r1;*r0p=r0;}
+static void f1875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-17);*r1p=r1;*r0p=r0;}
+static void f1876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,32);*r1p=r1;*r0p=r0;}
+static void f1877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-32);*r1p=r1;*r0p=r0;}
+static void f1878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,31);*r1p=r1;*r0p=r0;}
+static void f1879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-33);*r1p=r1;*r0p=r0;}
+static void f1880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,64);*r1p=r1;*r0p=r0;}
+static void f1881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-64);*r1p=r1;*r0p=r0;}
+static void f1882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,63);*r1p=r1;*r0p=r0;}
+static void f1883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-65);*r1p=r1;*r0p=r0;}
+static void f1884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,128);*r1p=r1;*r0p=r0;}
+static void f1885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-128);*r1p=r1;*r0p=r0;}
+static void f1886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,127);*r1p=r1;*r0p=r0;}
+static void f1887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-129);*r1p=r1;*r0p=r0;}
+static void f1888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,256);*r1p=r1;*r0p=r0;}
+static void f1889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-256);*r1p=r1;*r0p=r0;}
+static void f1890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,255);*r1p=r1;*r0p=r0;}
+static void f1891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-257);*r1p=r1;*r0p=r0;}
+static void f1892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,512);*r1p=r1;*r0p=r0;}
+static void f1893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-512);*r1p=r1;*r0p=r0;}
+static void f1894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,511);*r1p=r1;*r0p=r0;}
+static void f1895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-513);*r1p=r1;*r0p=r0;}
+static void f1896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,1024);*r1p=r1;*r0p=r0;}
+static void f1897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,1023);*r1p=r1;*r0p=r0;}
+static void f1899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,2048);*r1p=r1;*r0p=r0;}
+static void f1901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,2047);*r1p=r1;*r0p=r0;}
+static void f1903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,4096);*r1p=r1;*r0p=r0;}
+static void f1905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,4095);*r1p=r1;*r0p=r0;}
+static void f1907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,8192);*r1p=r1;*r0p=r0;}
+static void f1909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,8191);*r1p=r1;*r0p=r0;}
+static void f1911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,16384);*r1p=r1;*r0p=r0;}
+static void f1913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,16383);*r1p=r1;*r0p=r0;}
+static void f1915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,32768);*r1p=r1;*r0p=r0;}
+static void f1917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,32767);*r1p=r1;*r0p=r0;}
+static void f1919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-128,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,1);*r1p=r1;*r0p=r0;}
+static void f1921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-1);*r1p=r1;*r0p=r0;}
+static void f1922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,0);*r1p=r1;*r0p=r0;}
+static void f1923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-2);*r1p=r1;*r0p=r0;}
+static void f1924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,2);*r1p=r1;*r0p=r0;}
+static void f1925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-2);*r1p=r1;*r0p=r0;}
+static void f1926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,1);*r1p=r1;*r0p=r0;}
+static void f1927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-3);*r1p=r1;*r0p=r0;}
+static void f1928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,4);*r1p=r1;*r0p=r0;}
+static void f1929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-4);*r1p=r1;*r0p=r0;}
+static void f1930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,3);*r1p=r1;*r0p=r0;}
+static void f1931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-5);*r1p=r1;*r0p=r0;}
+static void f1932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,8);*r1p=r1;*r0p=r0;}
+static void f1933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-8);*r1p=r1;*r0p=r0;}
+static void f1934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,7);*r1p=r1;*r0p=r0;}
+static void f1935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-9);*r1p=r1;*r0p=r0;}
+static void f1936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,16);*r1p=r1;*r0p=r0;}
+static void f1937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-16);*r1p=r1;*r0p=r0;}
+static void f1938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,15);*r1p=r1;*r0p=r0;}
+static void f1939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-17);*r1p=r1;*r0p=r0;}
+static void f1940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,32);*r1p=r1;*r0p=r0;}
+static void f1941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-32);*r1p=r1;*r0p=r0;}
+static void f1942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,31);*r1p=r1;*r0p=r0;}
+static void f1943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-33);*r1p=r1;*r0p=r0;}
+static void f1944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,64);*r1p=r1;*r0p=r0;}
+static void f1945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-64);*r1p=r1;*r0p=r0;}
+static void f1946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,63);*r1p=r1;*r0p=r0;}
+static void f1947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-65);*r1p=r1;*r0p=r0;}
+static void f1948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,128);*r1p=r1;*r0p=r0;}
+static void f1949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-128);*r1p=r1;*r0p=r0;}
+static void f1950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,127);*r1p=r1;*r0p=r0;}
+static void f1951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-129);*r1p=r1;*r0p=r0;}
+static void f1952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,256);*r1p=r1;*r0p=r0;}
+static void f1953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-256);*r1p=r1;*r0p=r0;}
+static void f1954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,255);*r1p=r1;*r0p=r0;}
+static void f1955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-257);*r1p=r1;*r0p=r0;}
+static void f1956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,512);*r1p=r1;*r0p=r0;}
+static void f1957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-512);*r1p=r1;*r0p=r0;}
+static void f1958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,511);*r1p=r1;*r0p=r0;}
+static void f1959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-513);*r1p=r1;*r0p=r0;}
+static void f1960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,1024);*r1p=r1;*r0p=r0;}
+static void f1961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-1024);*r1p=r1;*r0p=r0;}
+static void f1962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,1023);*r1p=r1;*r0p=r0;}
+static void f1963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-1025);*r1p=r1;*r0p=r0;}
+static void f1964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,2048);*r1p=r1;*r0p=r0;}
+static void f1965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-2048);*r1p=r1;*r0p=r0;}
+static void f1966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,2047);*r1p=r1;*r0p=r0;}
+static void f1967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-2049);*r1p=r1;*r0p=r0;}
+static void f1968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,4096);*r1p=r1;*r0p=r0;}
+static void f1969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-4096);*r1p=r1;*r0p=r0;}
+static void f1970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,4095);*r1p=r1;*r0p=r0;}
+static void f1971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-4097);*r1p=r1;*r0p=r0;}
+static void f1972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,8192);*r1p=r1;*r0p=r0;}
+static void f1973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-8192);*r1p=r1;*r0p=r0;}
+static void f1974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,8191);*r1p=r1;*r0p=r0;}
+static void f1975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-8193);*r1p=r1;*r0p=r0;}
+static void f1976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,16384);*r1p=r1;*r0p=r0;}
+static void f1977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-16384);*r1p=r1;*r0p=r0;}
+static void f1978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,16383);*r1p=r1;*r0p=r0;}
+static void f1979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-16385);*r1p=r1;*r0p=r0;}
+static void f1980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,32768);*r1p=r1;*r0p=r0;}
+static void f1981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-32768);*r1p=r1;*r0p=r0;}
+static void f1982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,32767);*r1p=r1;*r0p=r0;}
+static void f1983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,127,0,-32769);*r1p=r1;*r0p=r0;}
+static void f1984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,1);*r1p=r1;*r0p=r0;}
+static void f1985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-1);*r1p=r1;*r0p=r0;}
+static void f1986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,0);*r1p=r1;*r0p=r0;}
+static void f1987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-2);*r1p=r1;*r0p=r0;}
+static void f1988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,2);*r1p=r1;*r0p=r0;}
+static void f1989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-2);*r1p=r1;*r0p=r0;}
+static void f1990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,1);*r1p=r1;*r0p=r0;}
+static void f1991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-3);*r1p=r1;*r0p=r0;}
+static void f1992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,4);*r1p=r1;*r0p=r0;}
+static void f1993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-4);*r1p=r1;*r0p=r0;}
+static void f1994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,3);*r1p=r1;*r0p=r0;}
+static void f1995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-5);*r1p=r1;*r0p=r0;}
+static void f1996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,8);*r1p=r1;*r0p=r0;}
+static void f1997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-8);*r1p=r1;*r0p=r0;}
+static void f1998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,7);*r1p=r1;*r0p=r0;}
+static void f1999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-9);*r1p=r1;*r0p=r0;}
+static void f2000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,16);*r1p=r1;*r0p=r0;}
+static void f2001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-16);*r1p=r1;*r0p=r0;}
+static void f2002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,15);*r1p=r1;*r0p=r0;}
+static void f2003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-17);*r1p=r1;*r0p=r0;}
+static void f2004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,32);*r1p=r1;*r0p=r0;}
+static void f2005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-32);*r1p=r1;*r0p=r0;}
+static void f2006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,31);*r1p=r1;*r0p=r0;}
+static void f2007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-33);*r1p=r1;*r0p=r0;}
+static void f2008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,64);*r1p=r1;*r0p=r0;}
+static void f2009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-64);*r1p=r1;*r0p=r0;}
+static void f2010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,63);*r1p=r1;*r0p=r0;}
+static void f2011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-65);*r1p=r1;*r0p=r0;}
+static void f2012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,128);*r1p=r1;*r0p=r0;}
+static void f2013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-128);*r1p=r1;*r0p=r0;}
+static void f2014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,127);*r1p=r1;*r0p=r0;}
+static void f2015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-129);*r1p=r1;*r0p=r0;}
+static void f2016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,256);*r1p=r1;*r0p=r0;}
+static void f2017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-256);*r1p=r1;*r0p=r0;}
+static void f2018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,255);*r1p=r1;*r0p=r0;}
+static void f2019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-257);*r1p=r1;*r0p=r0;}
+static void f2020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,512);*r1p=r1;*r0p=r0;}
+static void f2021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-512);*r1p=r1;*r0p=r0;}
+static void f2022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,511);*r1p=r1;*r0p=r0;}
+static void f2023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-513);*r1p=r1;*r0p=r0;}
+static void f2024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,1024);*r1p=r1;*r0p=r0;}
+static void f2025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,1023);*r1p=r1;*r0p=r0;}
+static void f2027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,2048);*r1p=r1;*r0p=r0;}
+static void f2029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,2047);*r1p=r1;*r0p=r0;}
+static void f2031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,4096);*r1p=r1;*r0p=r0;}
+static void f2033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,4095);*r1p=r1;*r0p=r0;}
+static void f2035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,8192);*r1p=r1;*r0p=r0;}
+static void f2037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,8191);*r1p=r1;*r0p=r0;}
+static void f2039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,16384);*r1p=r1;*r0p=r0;}
+static void f2041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,16383);*r1p=r1;*r0p=r0;}
+static void f2043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,32768);*r1p=r1;*r0p=r0;}
+static void f2045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,32767);*r1p=r1;*r0p=r0;}
+static void f2047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-129,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,1);*r1p=r1;*r0p=r0;}
+static void f2049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-1);*r1p=r1;*r0p=r0;}
+static void f2050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,0);*r1p=r1;*r0p=r0;}
+static void f2051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,2);*r1p=r1;*r0p=r0;}
+static void f2053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,1);*r1p=r1;*r0p=r0;}
+static void f2055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-3);*r1p=r1;*r0p=r0;}
+static void f2056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,4);*r1p=r1;*r0p=r0;}
+static void f2057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-4);*r1p=r1;*r0p=r0;}
+static void f2058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,3);*r1p=r1;*r0p=r0;}
+static void f2059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-5);*r1p=r1;*r0p=r0;}
+static void f2060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,8);*r1p=r1;*r0p=r0;}
+static void f2061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-8);*r1p=r1;*r0p=r0;}
+static void f2062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,7);*r1p=r1;*r0p=r0;}
+static void f2063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-9);*r1p=r1;*r0p=r0;}
+static void f2064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,16);*r1p=r1;*r0p=r0;}
+static void f2065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-16);*r1p=r1;*r0p=r0;}
+static void f2066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,15);*r1p=r1;*r0p=r0;}
+static void f2067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-17);*r1p=r1;*r0p=r0;}
+static void f2068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,32);*r1p=r1;*r0p=r0;}
+static void f2069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-32);*r1p=r1;*r0p=r0;}
+static void f2070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,31);*r1p=r1;*r0p=r0;}
+static void f2071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-33);*r1p=r1;*r0p=r0;}
+static void f2072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,64);*r1p=r1;*r0p=r0;}
+static void f2073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-64);*r1p=r1;*r0p=r0;}
+static void f2074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,63);*r1p=r1;*r0p=r0;}
+static void f2075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-65);*r1p=r1;*r0p=r0;}
+static void f2076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,128);*r1p=r1;*r0p=r0;}
+static void f2077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-128);*r1p=r1;*r0p=r0;}
+static void f2078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,127);*r1p=r1;*r0p=r0;}
+static void f2079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-129);*r1p=r1;*r0p=r0;}
+static void f2080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,256);*r1p=r1;*r0p=r0;}
+static void f2081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-256);*r1p=r1;*r0p=r0;}
+static void f2082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,255);*r1p=r1;*r0p=r0;}
+static void f2083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-257);*r1p=r1;*r0p=r0;}
+static void f2084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,512);*r1p=r1;*r0p=r0;}
+static void f2085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-512);*r1p=r1;*r0p=r0;}
+static void f2086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,511);*r1p=r1;*r0p=r0;}
+static void f2087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-513);*r1p=r1;*r0p=r0;}
+static void f2088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,1024);*r1p=r1;*r0p=r0;}
+static void f2089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,1023);*r1p=r1;*r0p=r0;}
+static void f2091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,2048);*r1p=r1;*r0p=r0;}
+static void f2093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,2047);*r1p=r1;*r0p=r0;}
+static void f2095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,4096);*r1p=r1;*r0p=r0;}
+static void f2097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,4095);*r1p=r1;*r0p=r0;}
+static void f2099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,8192);*r1p=r1;*r0p=r0;}
+static void f2101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,8191);*r1p=r1;*r0p=r0;}
+static void f2103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,16384);*r1p=r1;*r0p=r0;}
+static void f2105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,16383);*r1p=r1;*r0p=r0;}
+static void f2107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,32768);*r1p=r1;*r0p=r0;}
+static void f2109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,32767);*r1p=r1;*r0p=r0;}
+static void f2111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,256,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,1);*r1p=r1;*r0p=r0;}
+static void f2113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-1);*r1p=r1;*r0p=r0;}
+static void f2114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,0);*r1p=r1;*r0p=r0;}
+static void f2115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,2);*r1p=r1;*r0p=r0;}
+static void f2117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-2);*r1p=r1;*r0p=r0;}
+static void f2118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,1);*r1p=r1;*r0p=r0;}
+static void f2119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-3);*r1p=r1;*r0p=r0;}
+static void f2120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,4);*r1p=r1;*r0p=r0;}
+static void f2121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-4);*r1p=r1;*r0p=r0;}
+static void f2122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,3);*r1p=r1;*r0p=r0;}
+static void f2123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-5);*r1p=r1;*r0p=r0;}
+static void f2124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,8);*r1p=r1;*r0p=r0;}
+static void f2125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-8);*r1p=r1;*r0p=r0;}
+static void f2126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,7);*r1p=r1;*r0p=r0;}
+static void f2127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-9);*r1p=r1;*r0p=r0;}
+static void f2128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,16);*r1p=r1;*r0p=r0;}
+static void f2129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-16);*r1p=r1;*r0p=r0;}
+static void f2130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,15);*r1p=r1;*r0p=r0;}
+static void f2131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-17);*r1p=r1;*r0p=r0;}
+static void f2132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,32);*r1p=r1;*r0p=r0;}
+static void f2133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-32);*r1p=r1;*r0p=r0;}
+static void f2134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,31);*r1p=r1;*r0p=r0;}
+static void f2135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-33);*r1p=r1;*r0p=r0;}
+static void f2136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,64);*r1p=r1;*r0p=r0;}
+static void f2137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-64);*r1p=r1;*r0p=r0;}
+static void f2138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,63);*r1p=r1;*r0p=r0;}
+static void f2139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-65);*r1p=r1;*r0p=r0;}
+static void f2140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,128);*r1p=r1;*r0p=r0;}
+static void f2141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-128);*r1p=r1;*r0p=r0;}
+static void f2142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,127);*r1p=r1;*r0p=r0;}
+static void f2143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-129);*r1p=r1;*r0p=r0;}
+static void f2144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,256);*r1p=r1;*r0p=r0;}
+static void f2145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-256);*r1p=r1;*r0p=r0;}
+static void f2146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,255);*r1p=r1;*r0p=r0;}
+static void f2147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-257);*r1p=r1;*r0p=r0;}
+static void f2148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,512);*r1p=r1;*r0p=r0;}
+static void f2149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-512);*r1p=r1;*r0p=r0;}
+static void f2150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,511);*r1p=r1;*r0p=r0;}
+static void f2151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-513);*r1p=r1;*r0p=r0;}
+static void f2152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,1024);*r1p=r1;*r0p=r0;}
+static void f2153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,1023);*r1p=r1;*r0p=r0;}
+static void f2155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,2048);*r1p=r1;*r0p=r0;}
+static void f2157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,2047);*r1p=r1;*r0p=r0;}
+static void f2159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,4096);*r1p=r1;*r0p=r0;}
+static void f2161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,4095);*r1p=r1;*r0p=r0;}
+static void f2163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,8192);*r1p=r1;*r0p=r0;}
+static void f2165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,8191);*r1p=r1;*r0p=r0;}
+static void f2167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,16384);*r1p=r1;*r0p=r0;}
+static void f2169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,16383);*r1p=r1;*r0p=r0;}
+static void f2171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,32768);*r1p=r1;*r0p=r0;}
+static void f2173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,32767);*r1p=r1;*r0p=r0;}
+static void f2175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-256,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,1);*r1p=r1;*r0p=r0;}
+static void f2177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-1);*r1p=r1;*r0p=r0;}
+static void f2178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,0);*r1p=r1;*r0p=r0;}
+static void f2179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-2);*r1p=r1;*r0p=r0;}
+static void f2180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,2);*r1p=r1;*r0p=r0;}
+static void f2181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-2);*r1p=r1;*r0p=r0;}
+static void f2182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,1);*r1p=r1;*r0p=r0;}
+static void f2183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-3);*r1p=r1;*r0p=r0;}
+static void f2184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,4);*r1p=r1;*r0p=r0;}
+static void f2185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-4);*r1p=r1;*r0p=r0;}
+static void f2186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,3);*r1p=r1;*r0p=r0;}
+static void f2187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-5);*r1p=r1;*r0p=r0;}
+static void f2188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,8);*r1p=r1;*r0p=r0;}
+static void f2189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-8);*r1p=r1;*r0p=r0;}
+static void f2190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,7);*r1p=r1;*r0p=r0;}
+static void f2191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-9);*r1p=r1;*r0p=r0;}
+static void f2192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,16);*r1p=r1;*r0p=r0;}
+static void f2193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-16);*r1p=r1;*r0p=r0;}
+static void f2194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,15);*r1p=r1;*r0p=r0;}
+static void f2195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-17);*r1p=r1;*r0p=r0;}
+static void f2196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,32);*r1p=r1;*r0p=r0;}
+static void f2197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-32);*r1p=r1;*r0p=r0;}
+static void f2198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,31);*r1p=r1;*r0p=r0;}
+static void f2199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-33);*r1p=r1;*r0p=r0;}
+static void f2200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,64);*r1p=r1;*r0p=r0;}
+static void f2201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-64);*r1p=r1;*r0p=r0;}
+static void f2202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,63);*r1p=r1;*r0p=r0;}
+static void f2203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-65);*r1p=r1;*r0p=r0;}
+static void f2204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,128);*r1p=r1;*r0p=r0;}
+static void f2205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-128);*r1p=r1;*r0p=r0;}
+static void f2206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,127);*r1p=r1;*r0p=r0;}
+static void f2207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-129);*r1p=r1;*r0p=r0;}
+static void f2208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,256);*r1p=r1;*r0p=r0;}
+static void f2209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-256);*r1p=r1;*r0p=r0;}
+static void f2210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,255);*r1p=r1;*r0p=r0;}
+static void f2211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-257);*r1p=r1;*r0p=r0;}
+static void f2212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,512);*r1p=r1;*r0p=r0;}
+static void f2213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-512);*r1p=r1;*r0p=r0;}
+static void f2214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,511);*r1p=r1;*r0p=r0;}
+static void f2215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-513);*r1p=r1;*r0p=r0;}
+static void f2216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,1024);*r1p=r1;*r0p=r0;}
+static void f2217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,1023);*r1p=r1;*r0p=r0;}
+static void f2219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,2048);*r1p=r1;*r0p=r0;}
+static void f2221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,2047);*r1p=r1;*r0p=r0;}
+static void f2223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,4096);*r1p=r1;*r0p=r0;}
+static void f2225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,4095);*r1p=r1;*r0p=r0;}
+static void f2227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,8192);*r1p=r1;*r0p=r0;}
+static void f2229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,8191);*r1p=r1;*r0p=r0;}
+static void f2231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,16384);*r1p=r1;*r0p=r0;}
+static void f2233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,16383);*r1p=r1;*r0p=r0;}
+static void f2235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,32768);*r1p=r1;*r0p=r0;}
+static void f2237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,32767);*r1p=r1;*r0p=r0;}
+static void f2239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,255,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,1);*r1p=r1;*r0p=r0;}
+static void f2241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-1);*r1p=r1;*r0p=r0;}
+static void f2242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,0);*r1p=r1;*r0p=r0;}
+static void f2243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-2);*r1p=r1;*r0p=r0;}
+static void f2244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,2);*r1p=r1;*r0p=r0;}
+static void f2245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-2);*r1p=r1;*r0p=r0;}
+static void f2246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,1);*r1p=r1;*r0p=r0;}
+static void f2247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-3);*r1p=r1;*r0p=r0;}
+static void f2248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,4);*r1p=r1;*r0p=r0;}
+static void f2249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-4);*r1p=r1;*r0p=r0;}
+static void f2250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,3);*r1p=r1;*r0p=r0;}
+static void f2251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-5);*r1p=r1;*r0p=r0;}
+static void f2252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,8);*r1p=r1;*r0p=r0;}
+static void f2253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-8);*r1p=r1;*r0p=r0;}
+static void f2254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,7);*r1p=r1;*r0p=r0;}
+static void f2255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-9);*r1p=r1;*r0p=r0;}
+static void f2256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,16);*r1p=r1;*r0p=r0;}
+static void f2257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-16);*r1p=r1;*r0p=r0;}
+static void f2258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,15);*r1p=r1;*r0p=r0;}
+static void f2259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-17);*r1p=r1;*r0p=r0;}
+static void f2260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,32);*r1p=r1;*r0p=r0;}
+static void f2261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-32);*r1p=r1;*r0p=r0;}
+static void f2262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,31);*r1p=r1;*r0p=r0;}
+static void f2263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-33);*r1p=r1;*r0p=r0;}
+static void f2264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,64);*r1p=r1;*r0p=r0;}
+static void f2265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-64);*r1p=r1;*r0p=r0;}
+static void f2266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,63);*r1p=r1;*r0p=r0;}
+static void f2267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-65);*r1p=r1;*r0p=r0;}
+static void f2268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,128);*r1p=r1;*r0p=r0;}
+static void f2269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-128);*r1p=r1;*r0p=r0;}
+static void f2270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,127);*r1p=r1;*r0p=r0;}
+static void f2271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-129);*r1p=r1;*r0p=r0;}
+static void f2272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,256);*r1p=r1;*r0p=r0;}
+static void f2273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-256);*r1p=r1;*r0p=r0;}
+static void f2274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,255);*r1p=r1;*r0p=r0;}
+static void f2275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-257);*r1p=r1;*r0p=r0;}
+static void f2276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,512);*r1p=r1;*r0p=r0;}
+static void f2277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-512);*r1p=r1;*r0p=r0;}
+static void f2278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,511);*r1p=r1;*r0p=r0;}
+static void f2279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-513);*r1p=r1;*r0p=r0;}
+static void f2280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,1024);*r1p=r1;*r0p=r0;}
+static void f2281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,1023);*r1p=r1;*r0p=r0;}
+static void f2283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,2048);*r1p=r1;*r0p=r0;}
+static void f2285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,2047);*r1p=r1;*r0p=r0;}
+static void f2287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,4096);*r1p=r1;*r0p=r0;}
+static void f2289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,4095);*r1p=r1;*r0p=r0;}
+static void f2291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,8192);*r1p=r1;*r0p=r0;}
+static void f2293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,8191);*r1p=r1;*r0p=r0;}
+static void f2295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,16384);*r1p=r1;*r0p=r0;}
+static void f2297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,16383);*r1p=r1;*r0p=r0;}
+static void f2299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,32768);*r1p=r1;*r0p=r0;}
+static void f2301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,32767);*r1p=r1;*r0p=r0;}
+static void f2303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-257,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,1);*r1p=r1;*r0p=r0;}
+static void f2305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-1);*r1p=r1;*r0p=r0;}
+static void f2306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,0);*r1p=r1;*r0p=r0;}
+static void f2307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,2);*r1p=r1;*r0p=r0;}
+static void f2309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,1);*r1p=r1;*r0p=r0;}
+static void f2311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-3);*r1p=r1;*r0p=r0;}
+static void f2312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,4);*r1p=r1;*r0p=r0;}
+static void f2313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-4);*r1p=r1;*r0p=r0;}
+static void f2314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,3);*r1p=r1;*r0p=r0;}
+static void f2315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-5);*r1p=r1;*r0p=r0;}
+static void f2316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,8);*r1p=r1;*r0p=r0;}
+static void f2317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-8);*r1p=r1;*r0p=r0;}
+static void f2318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,7);*r1p=r1;*r0p=r0;}
+static void f2319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-9);*r1p=r1;*r0p=r0;}
+static void f2320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,16);*r1p=r1;*r0p=r0;}
+static void f2321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-16);*r1p=r1;*r0p=r0;}
+static void f2322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,15);*r1p=r1;*r0p=r0;}
+static void f2323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-17);*r1p=r1;*r0p=r0;}
+static void f2324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,32);*r1p=r1;*r0p=r0;}
+static void f2325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-32);*r1p=r1;*r0p=r0;}
+static void f2326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,31);*r1p=r1;*r0p=r0;}
+static void f2327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-33);*r1p=r1;*r0p=r0;}
+static void f2328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,64);*r1p=r1;*r0p=r0;}
+static void f2329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-64);*r1p=r1;*r0p=r0;}
+static void f2330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,63);*r1p=r1;*r0p=r0;}
+static void f2331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-65);*r1p=r1;*r0p=r0;}
+static void f2332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,128);*r1p=r1;*r0p=r0;}
+static void f2333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-128);*r1p=r1;*r0p=r0;}
+static void f2334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,127);*r1p=r1;*r0p=r0;}
+static void f2335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-129);*r1p=r1;*r0p=r0;}
+static void f2336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,256);*r1p=r1;*r0p=r0;}
+static void f2337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-256);*r1p=r1;*r0p=r0;}
+static void f2338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,255);*r1p=r1;*r0p=r0;}
+static void f2339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-257);*r1p=r1;*r0p=r0;}
+static void f2340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,512);*r1p=r1;*r0p=r0;}
+static void f2341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-512);*r1p=r1;*r0p=r0;}
+static void f2342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,511);*r1p=r1;*r0p=r0;}
+static void f2343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-513);*r1p=r1;*r0p=r0;}
+static void f2344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,1024);*r1p=r1;*r0p=r0;}
+static void f2345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,1023);*r1p=r1;*r0p=r0;}
+static void f2347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,2048);*r1p=r1;*r0p=r0;}
+static void f2349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,2047);*r1p=r1;*r0p=r0;}
+static void f2351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,4096);*r1p=r1;*r0p=r0;}
+static void f2353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,4095);*r1p=r1;*r0p=r0;}
+static void f2355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,8192);*r1p=r1;*r0p=r0;}
+static void f2357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,8191);*r1p=r1;*r0p=r0;}
+static void f2359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,16384);*r1p=r1;*r0p=r0;}
+static void f2361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,16383);*r1p=r1;*r0p=r0;}
+static void f2363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,32768);*r1p=r1;*r0p=r0;}
+static void f2365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,32767);*r1p=r1;*r0p=r0;}
+static void f2367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,512,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,1);*r1p=r1;*r0p=r0;}
+static void f2369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-1);*r1p=r1;*r0p=r0;}
+static void f2370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,0);*r1p=r1;*r0p=r0;}
+static void f2371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,2);*r1p=r1;*r0p=r0;}
+static void f2373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-2);*r1p=r1;*r0p=r0;}
+static void f2374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,1);*r1p=r1;*r0p=r0;}
+static void f2375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-3);*r1p=r1;*r0p=r0;}
+static void f2376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,4);*r1p=r1;*r0p=r0;}
+static void f2377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-4);*r1p=r1;*r0p=r0;}
+static void f2378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,3);*r1p=r1;*r0p=r0;}
+static void f2379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-5);*r1p=r1;*r0p=r0;}
+static void f2380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,8);*r1p=r1;*r0p=r0;}
+static void f2381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-8);*r1p=r1;*r0p=r0;}
+static void f2382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,7);*r1p=r1;*r0p=r0;}
+static void f2383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-9);*r1p=r1;*r0p=r0;}
+static void f2384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,16);*r1p=r1;*r0p=r0;}
+static void f2385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-16);*r1p=r1;*r0p=r0;}
+static void f2386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,15);*r1p=r1;*r0p=r0;}
+static void f2387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-17);*r1p=r1;*r0p=r0;}
+static void f2388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,32);*r1p=r1;*r0p=r0;}
+static void f2389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-32);*r1p=r1;*r0p=r0;}
+static void f2390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,31);*r1p=r1;*r0p=r0;}
+static void f2391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-33);*r1p=r1;*r0p=r0;}
+static void f2392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,64);*r1p=r1;*r0p=r0;}
+static void f2393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-64);*r1p=r1;*r0p=r0;}
+static void f2394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,63);*r1p=r1;*r0p=r0;}
+static void f2395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-65);*r1p=r1;*r0p=r0;}
+static void f2396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,128);*r1p=r1;*r0p=r0;}
+static void f2397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-128);*r1p=r1;*r0p=r0;}
+static void f2398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,127);*r1p=r1;*r0p=r0;}
+static void f2399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-129);*r1p=r1;*r0p=r0;}
+static void f2400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,256);*r1p=r1;*r0p=r0;}
+static void f2401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-256);*r1p=r1;*r0p=r0;}
+static void f2402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,255);*r1p=r1;*r0p=r0;}
+static void f2403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-257);*r1p=r1;*r0p=r0;}
+static void f2404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,512);*r1p=r1;*r0p=r0;}
+static void f2405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-512);*r1p=r1;*r0p=r0;}
+static void f2406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,511);*r1p=r1;*r0p=r0;}
+static void f2407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-513);*r1p=r1;*r0p=r0;}
+static void f2408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,1024);*r1p=r1;*r0p=r0;}
+static void f2409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,1023);*r1p=r1;*r0p=r0;}
+static void f2411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,2048);*r1p=r1;*r0p=r0;}
+static void f2413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,2047);*r1p=r1;*r0p=r0;}
+static void f2415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,4096);*r1p=r1;*r0p=r0;}
+static void f2417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,4095);*r1p=r1;*r0p=r0;}
+static void f2419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,8192);*r1p=r1;*r0p=r0;}
+static void f2421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,8191);*r1p=r1;*r0p=r0;}
+static void f2423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,16384);*r1p=r1;*r0p=r0;}
+static void f2425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,16383);*r1p=r1;*r0p=r0;}
+static void f2427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,32768);*r1p=r1;*r0p=r0;}
+static void f2429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,32767);*r1p=r1;*r0p=r0;}
+static void f2431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-512,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,1);*r1p=r1;*r0p=r0;}
+static void f2433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-1);*r1p=r1;*r0p=r0;}
+static void f2434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,0);*r1p=r1;*r0p=r0;}
+static void f2435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-2);*r1p=r1;*r0p=r0;}
+static void f2436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,2);*r1p=r1;*r0p=r0;}
+static void f2437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-2);*r1p=r1;*r0p=r0;}
+static void f2438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,1);*r1p=r1;*r0p=r0;}
+static void f2439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-3);*r1p=r1;*r0p=r0;}
+static void f2440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,4);*r1p=r1;*r0p=r0;}
+static void f2441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-4);*r1p=r1;*r0p=r0;}
+static void f2442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,3);*r1p=r1;*r0p=r0;}
+static void f2443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-5);*r1p=r1;*r0p=r0;}
+static void f2444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,8);*r1p=r1;*r0p=r0;}
+static void f2445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-8);*r1p=r1;*r0p=r0;}
+static void f2446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,7);*r1p=r1;*r0p=r0;}
+static void f2447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-9);*r1p=r1;*r0p=r0;}
+static void f2448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,16);*r1p=r1;*r0p=r0;}
+static void f2449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-16);*r1p=r1;*r0p=r0;}
+static void f2450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,15);*r1p=r1;*r0p=r0;}
+static void f2451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-17);*r1p=r1;*r0p=r0;}
+static void f2452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,32);*r1p=r1;*r0p=r0;}
+static void f2453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-32);*r1p=r1;*r0p=r0;}
+static void f2454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,31);*r1p=r1;*r0p=r0;}
+static void f2455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-33);*r1p=r1;*r0p=r0;}
+static void f2456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,64);*r1p=r1;*r0p=r0;}
+static void f2457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-64);*r1p=r1;*r0p=r0;}
+static void f2458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,63);*r1p=r1;*r0p=r0;}
+static void f2459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-65);*r1p=r1;*r0p=r0;}
+static void f2460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,128);*r1p=r1;*r0p=r0;}
+static void f2461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-128);*r1p=r1;*r0p=r0;}
+static void f2462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,127);*r1p=r1;*r0p=r0;}
+static void f2463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-129);*r1p=r1;*r0p=r0;}
+static void f2464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,256);*r1p=r1;*r0p=r0;}
+static void f2465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-256);*r1p=r1;*r0p=r0;}
+static void f2466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,255);*r1p=r1;*r0p=r0;}
+static void f2467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-257);*r1p=r1;*r0p=r0;}
+static void f2468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,512);*r1p=r1;*r0p=r0;}
+static void f2469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-512);*r1p=r1;*r0p=r0;}
+static void f2470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,511);*r1p=r1;*r0p=r0;}
+static void f2471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-513);*r1p=r1;*r0p=r0;}
+static void f2472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,1024);*r1p=r1;*r0p=r0;}
+static void f2473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,1023);*r1p=r1;*r0p=r0;}
+static void f2475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,2048);*r1p=r1;*r0p=r0;}
+static void f2477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,2047);*r1p=r1;*r0p=r0;}
+static void f2479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,4096);*r1p=r1;*r0p=r0;}
+static void f2481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,4095);*r1p=r1;*r0p=r0;}
+static void f2483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,8192);*r1p=r1;*r0p=r0;}
+static void f2485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,8191);*r1p=r1;*r0p=r0;}
+static void f2487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,16384);*r1p=r1;*r0p=r0;}
+static void f2489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,16383);*r1p=r1;*r0p=r0;}
+static void f2491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,32768);*r1p=r1;*r0p=r0;}
+static void f2493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,32767);*r1p=r1;*r0p=r0;}
+static void f2495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,511,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,1);*r1p=r1;*r0p=r0;}
+static void f2497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-1);*r1p=r1;*r0p=r0;}
+static void f2498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,0);*r1p=r1;*r0p=r0;}
+static void f2499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-2);*r1p=r1;*r0p=r0;}
+static void f2500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,2);*r1p=r1;*r0p=r0;}
+static void f2501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-2);*r1p=r1;*r0p=r0;}
+static void f2502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,1);*r1p=r1;*r0p=r0;}
+static void f2503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-3);*r1p=r1;*r0p=r0;}
+static void f2504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,4);*r1p=r1;*r0p=r0;}
+static void f2505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-4);*r1p=r1;*r0p=r0;}
+static void f2506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,3);*r1p=r1;*r0p=r0;}
+static void f2507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-5);*r1p=r1;*r0p=r0;}
+static void f2508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,8);*r1p=r1;*r0p=r0;}
+static void f2509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-8);*r1p=r1;*r0p=r0;}
+static void f2510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,7);*r1p=r1;*r0p=r0;}
+static void f2511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-9);*r1p=r1;*r0p=r0;}
+static void f2512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,16);*r1p=r1;*r0p=r0;}
+static void f2513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-16);*r1p=r1;*r0p=r0;}
+static void f2514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,15);*r1p=r1;*r0p=r0;}
+static void f2515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-17);*r1p=r1;*r0p=r0;}
+static void f2516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,32);*r1p=r1;*r0p=r0;}
+static void f2517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-32);*r1p=r1;*r0p=r0;}
+static void f2518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,31);*r1p=r1;*r0p=r0;}
+static void f2519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-33);*r1p=r1;*r0p=r0;}
+static void f2520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,64);*r1p=r1;*r0p=r0;}
+static void f2521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-64);*r1p=r1;*r0p=r0;}
+static void f2522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,63);*r1p=r1;*r0p=r0;}
+static void f2523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-65);*r1p=r1;*r0p=r0;}
+static void f2524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,128);*r1p=r1;*r0p=r0;}
+static void f2525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-128);*r1p=r1;*r0p=r0;}
+static void f2526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,127);*r1p=r1;*r0p=r0;}
+static void f2527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-129);*r1p=r1;*r0p=r0;}
+static void f2528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,256);*r1p=r1;*r0p=r0;}
+static void f2529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-256);*r1p=r1;*r0p=r0;}
+static void f2530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,255);*r1p=r1;*r0p=r0;}
+static void f2531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-257);*r1p=r1;*r0p=r0;}
+static void f2532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,512);*r1p=r1;*r0p=r0;}
+static void f2533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-512);*r1p=r1;*r0p=r0;}
+static void f2534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,511);*r1p=r1;*r0p=r0;}
+static void f2535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-513);*r1p=r1;*r0p=r0;}
+static void f2536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,1024);*r1p=r1;*r0p=r0;}
+static void f2537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,1023);*r1p=r1;*r0p=r0;}
+static void f2539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,2048);*r1p=r1;*r0p=r0;}
+static void f2541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,2047);*r1p=r1;*r0p=r0;}
+static void f2543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,4096);*r1p=r1;*r0p=r0;}
+static void f2545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,4095);*r1p=r1;*r0p=r0;}
+static void f2547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,8192);*r1p=r1;*r0p=r0;}
+static void f2549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,8191);*r1p=r1;*r0p=r0;}
+static void f2551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,16384);*r1p=r1;*r0p=r0;}
+static void f2553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,16383);*r1p=r1;*r0p=r0;}
+static void f2555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,32768);*r1p=r1;*r0p=r0;}
+static void f2557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,32767);*r1p=r1;*r0p=r0;}
+static void f2559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-513,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-1);*r1p=r1;*r0p=r0;}
+static void f2562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,0);*r1p=r1;*r0p=r0;}
+static void f2563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,2);*r1p=r1;*r0p=r0;}
+static void f2565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-3);*r1p=r1;*r0p=r0;}
+static void f2568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,4);*r1p=r1;*r0p=r0;}
+static void f2569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-4);*r1p=r1;*r0p=r0;}
+static void f2570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,3);*r1p=r1;*r0p=r0;}
+static void f2571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-5);*r1p=r1;*r0p=r0;}
+static void f2572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,8);*r1p=r1;*r0p=r0;}
+static void f2573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-8);*r1p=r1;*r0p=r0;}
+static void f2574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,7);*r1p=r1;*r0p=r0;}
+static void f2575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-9);*r1p=r1;*r0p=r0;}
+static void f2576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,16);*r1p=r1;*r0p=r0;}
+static void f2577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-16);*r1p=r1;*r0p=r0;}
+static void f2578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,15);*r1p=r1;*r0p=r0;}
+static void f2579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-17);*r1p=r1;*r0p=r0;}
+static void f2580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,32);*r1p=r1;*r0p=r0;}
+static void f2581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-32);*r1p=r1;*r0p=r0;}
+static void f2582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,31);*r1p=r1;*r0p=r0;}
+static void f2583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-33);*r1p=r1;*r0p=r0;}
+static void f2584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,64);*r1p=r1;*r0p=r0;}
+static void f2585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-64);*r1p=r1;*r0p=r0;}
+static void f2586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,63);*r1p=r1;*r0p=r0;}
+static void f2587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-65);*r1p=r1;*r0p=r0;}
+static void f2588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,128);*r1p=r1;*r0p=r0;}
+static void f2589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-128);*r1p=r1;*r0p=r0;}
+static void f2590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,127);*r1p=r1;*r0p=r0;}
+static void f2591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-129);*r1p=r1;*r0p=r0;}
+static void f2592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,256);*r1p=r1;*r0p=r0;}
+static void f2593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-256);*r1p=r1;*r0p=r0;}
+static void f2594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,255);*r1p=r1;*r0p=r0;}
+static void f2595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-257);*r1p=r1;*r0p=r0;}
+static void f2596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,512);*r1p=r1;*r0p=r0;}
+static void f2597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-512);*r1p=r1;*r0p=r0;}
+static void f2598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,511);*r1p=r1;*r0p=r0;}
+static void f2599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-513);*r1p=r1;*r0p=r0;}
+static void f2600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,1024);*r1p=r1;*r0p=r0;}
+static void f2601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,1023);*r1p=r1;*r0p=r0;}
+static void f2603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,2048);*r1p=r1;*r0p=r0;}
+static void f2605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,2047);*r1p=r1;*r0p=r0;}
+static void f2607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,4096);*r1p=r1;*r0p=r0;}
+static void f2609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,4095);*r1p=r1;*r0p=r0;}
+static void f2611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,8192);*r1p=r1;*r0p=r0;}
+static void f2613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,8191);*r1p=r1;*r0p=r0;}
+static void f2615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,16384);*r1p=r1;*r0p=r0;}
+static void f2617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,16383);*r1p=r1;*r0p=r0;}
+static void f2619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,32768);*r1p=r1;*r0p=r0;}
+static void f2621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,32767);*r1p=r1;*r0p=r0;}
+static void f2623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1024,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-1);*r1p=r1;*r0p=r0;}
+static void f2626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,0);*r1p=r1;*r0p=r0;}
+static void f2627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,2);*r1p=r1;*r0p=r0;}
+static void f2629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-2);*r1p=r1;*r0p=r0;}
+static void f2630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,1);*r1p=r1;*r0p=r0;}
+static void f2631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-3);*r1p=r1;*r0p=r0;}
+static void f2632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,4);*r1p=r1;*r0p=r0;}
+static void f2633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-4);*r1p=r1;*r0p=r0;}
+static void f2634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,3);*r1p=r1;*r0p=r0;}
+static void f2635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-5);*r1p=r1;*r0p=r0;}
+static void f2636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,8);*r1p=r1;*r0p=r0;}
+static void f2637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-8);*r1p=r1;*r0p=r0;}
+static void f2638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,7);*r1p=r1;*r0p=r0;}
+static void f2639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-9);*r1p=r1;*r0p=r0;}
+static void f2640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,16);*r1p=r1;*r0p=r0;}
+static void f2641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-16);*r1p=r1;*r0p=r0;}
+static void f2642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,15);*r1p=r1;*r0p=r0;}
+static void f2643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-17);*r1p=r1;*r0p=r0;}
+static void f2644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,32);*r1p=r1;*r0p=r0;}
+static void f2645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-32);*r1p=r1;*r0p=r0;}
+static void f2646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,31);*r1p=r1;*r0p=r0;}
+static void f2647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-33);*r1p=r1;*r0p=r0;}
+static void f2648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,64);*r1p=r1;*r0p=r0;}
+static void f2649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-64);*r1p=r1;*r0p=r0;}
+static void f2650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,63);*r1p=r1;*r0p=r0;}
+static void f2651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-65);*r1p=r1;*r0p=r0;}
+static void f2652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,128);*r1p=r1;*r0p=r0;}
+static void f2653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-128);*r1p=r1;*r0p=r0;}
+static void f2654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,127);*r1p=r1;*r0p=r0;}
+static void f2655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-129);*r1p=r1;*r0p=r0;}
+static void f2656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,256);*r1p=r1;*r0p=r0;}
+static void f2657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-256);*r1p=r1;*r0p=r0;}
+static void f2658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,255);*r1p=r1;*r0p=r0;}
+static void f2659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-257);*r1p=r1;*r0p=r0;}
+static void f2660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,512);*r1p=r1;*r0p=r0;}
+static void f2661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-512);*r1p=r1;*r0p=r0;}
+static void f2662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,511);*r1p=r1;*r0p=r0;}
+static void f2663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-513);*r1p=r1;*r0p=r0;}
+static void f2664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,1024);*r1p=r1;*r0p=r0;}
+static void f2665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,1023);*r1p=r1;*r0p=r0;}
+static void f2667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,2048);*r1p=r1;*r0p=r0;}
+static void f2669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,2047);*r1p=r1;*r0p=r0;}
+static void f2671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,4096);*r1p=r1;*r0p=r0;}
+static void f2673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,4095);*r1p=r1;*r0p=r0;}
+static void f2675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,8192);*r1p=r1;*r0p=r0;}
+static void f2677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,8191);*r1p=r1;*r0p=r0;}
+static void f2679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,16384);*r1p=r1;*r0p=r0;}
+static void f2681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,16383);*r1p=r1;*r0p=r0;}
+static void f2683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,32768);*r1p=r1;*r0p=r0;}
+static void f2685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,32767);*r1p=r1;*r0p=r0;}
+static void f2687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1024,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,1);*r1p=r1;*r0p=r0;}
+static void f2689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-1);*r1p=r1;*r0p=r0;}
+static void f2690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,0);*r1p=r1;*r0p=r0;}
+static void f2691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-2);*r1p=r1;*r0p=r0;}
+static void f2692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,2);*r1p=r1;*r0p=r0;}
+static void f2693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-2);*r1p=r1;*r0p=r0;}
+static void f2694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,1);*r1p=r1;*r0p=r0;}
+static void f2695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-3);*r1p=r1;*r0p=r0;}
+static void f2696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,4);*r1p=r1;*r0p=r0;}
+static void f2697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-4);*r1p=r1;*r0p=r0;}
+static void f2698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,3);*r1p=r1;*r0p=r0;}
+static void f2699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-5);*r1p=r1;*r0p=r0;}
+static void f2700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,8);*r1p=r1;*r0p=r0;}
+static void f2701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-8);*r1p=r1;*r0p=r0;}
+static void f2702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,7);*r1p=r1;*r0p=r0;}
+static void f2703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-9);*r1p=r1;*r0p=r0;}
+static void f2704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,16);*r1p=r1;*r0p=r0;}
+static void f2705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-16);*r1p=r1;*r0p=r0;}
+static void f2706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,15);*r1p=r1;*r0p=r0;}
+static void f2707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-17);*r1p=r1;*r0p=r0;}
+static void f2708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,32);*r1p=r1;*r0p=r0;}
+static void f2709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-32);*r1p=r1;*r0p=r0;}
+static void f2710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,31);*r1p=r1;*r0p=r0;}
+static void f2711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-33);*r1p=r1;*r0p=r0;}
+static void f2712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,64);*r1p=r1;*r0p=r0;}
+static void f2713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-64);*r1p=r1;*r0p=r0;}
+static void f2714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,63);*r1p=r1;*r0p=r0;}
+static void f2715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-65);*r1p=r1;*r0p=r0;}
+static void f2716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,128);*r1p=r1;*r0p=r0;}
+static void f2717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-128);*r1p=r1;*r0p=r0;}
+static void f2718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,127);*r1p=r1;*r0p=r0;}
+static void f2719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-129);*r1p=r1;*r0p=r0;}
+static void f2720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,256);*r1p=r1;*r0p=r0;}
+static void f2721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-256);*r1p=r1;*r0p=r0;}
+static void f2722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,255);*r1p=r1;*r0p=r0;}
+static void f2723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-257);*r1p=r1;*r0p=r0;}
+static void f2724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,512);*r1p=r1;*r0p=r0;}
+static void f2725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-512);*r1p=r1;*r0p=r0;}
+static void f2726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,511);*r1p=r1;*r0p=r0;}
+static void f2727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-513);*r1p=r1;*r0p=r0;}
+static void f2728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,1024);*r1p=r1;*r0p=r0;}
+static void f2729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,1023);*r1p=r1;*r0p=r0;}
+static void f2731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,2048);*r1p=r1;*r0p=r0;}
+static void f2733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,2047);*r1p=r1;*r0p=r0;}
+static void f2735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,4096);*r1p=r1;*r0p=r0;}
+static void f2737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,4095);*r1p=r1;*r0p=r0;}
+static void f2739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,8192);*r1p=r1;*r0p=r0;}
+static void f2741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,8191);*r1p=r1;*r0p=r0;}
+static void f2743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,16384);*r1p=r1;*r0p=r0;}
+static void f2745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,16383);*r1p=r1;*r0p=r0;}
+static void f2747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,32768);*r1p=r1;*r0p=r0;}
+static void f2749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,32767);*r1p=r1;*r0p=r0;}
+static void f2751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,1023,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,1);*r1p=r1;*r0p=r0;}
+static void f2753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-1);*r1p=r1;*r0p=r0;}
+static void f2754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,0);*r1p=r1;*r0p=r0;}
+static void f2755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-2);*r1p=r1;*r0p=r0;}
+static void f2756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,2);*r1p=r1;*r0p=r0;}
+static void f2757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-2);*r1p=r1;*r0p=r0;}
+static void f2758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,1);*r1p=r1;*r0p=r0;}
+static void f2759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-3);*r1p=r1;*r0p=r0;}
+static void f2760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,4);*r1p=r1;*r0p=r0;}
+static void f2761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-4);*r1p=r1;*r0p=r0;}
+static void f2762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,3);*r1p=r1;*r0p=r0;}
+static void f2763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-5);*r1p=r1;*r0p=r0;}
+static void f2764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,8);*r1p=r1;*r0p=r0;}
+static void f2765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-8);*r1p=r1;*r0p=r0;}
+static void f2766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,7);*r1p=r1;*r0p=r0;}
+static void f2767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-9);*r1p=r1;*r0p=r0;}
+static void f2768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,16);*r1p=r1;*r0p=r0;}
+static void f2769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-16);*r1p=r1;*r0p=r0;}
+static void f2770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,15);*r1p=r1;*r0p=r0;}
+static void f2771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-17);*r1p=r1;*r0p=r0;}
+static void f2772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,32);*r1p=r1;*r0p=r0;}
+static void f2773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-32);*r1p=r1;*r0p=r0;}
+static void f2774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,31);*r1p=r1;*r0p=r0;}
+static void f2775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-33);*r1p=r1;*r0p=r0;}
+static void f2776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,64);*r1p=r1;*r0p=r0;}
+static void f2777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-64);*r1p=r1;*r0p=r0;}
+static void f2778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,63);*r1p=r1;*r0p=r0;}
+static void f2779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-65);*r1p=r1;*r0p=r0;}
+static void f2780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,128);*r1p=r1;*r0p=r0;}
+static void f2781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-128);*r1p=r1;*r0p=r0;}
+static void f2782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,127);*r1p=r1;*r0p=r0;}
+static void f2783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-129);*r1p=r1;*r0p=r0;}
+static void f2784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,256);*r1p=r1;*r0p=r0;}
+static void f2785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-256);*r1p=r1;*r0p=r0;}
+static void f2786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,255);*r1p=r1;*r0p=r0;}
+static void f2787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-257);*r1p=r1;*r0p=r0;}
+static void f2788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,512);*r1p=r1;*r0p=r0;}
+static void f2789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-512);*r1p=r1;*r0p=r0;}
+static void f2790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,511);*r1p=r1;*r0p=r0;}
+static void f2791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-513);*r1p=r1;*r0p=r0;}
+static void f2792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,1024);*r1p=r1;*r0p=r0;}
+static void f2793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,1023);*r1p=r1;*r0p=r0;}
+static void f2795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,2048);*r1p=r1;*r0p=r0;}
+static void f2797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,2047);*r1p=r1;*r0p=r0;}
+static void f2799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,4096);*r1p=r1;*r0p=r0;}
+static void f2801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,4095);*r1p=r1;*r0p=r0;}
+static void f2803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,8192);*r1p=r1;*r0p=r0;}
+static void f2805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,8191);*r1p=r1;*r0p=r0;}
+static void f2807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,16384);*r1p=r1;*r0p=r0;}
+static void f2809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,16383);*r1p=r1;*r0p=r0;}
+static void f2811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,32768);*r1p=r1;*r0p=r0;}
+static void f2813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,32767);*r1p=r1;*r0p=r0;}
+static void f2815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-1025,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-1);*r1p=r1;*r0p=r0;}
+static void f2818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,0);*r1p=r1;*r0p=r0;}
+static void f2819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,2);*r1p=r1;*r0p=r0;}
+static void f2821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-3);*r1p=r1;*r0p=r0;}
+static void f2824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,4);*r1p=r1;*r0p=r0;}
+static void f2825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-4);*r1p=r1;*r0p=r0;}
+static void f2826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,3);*r1p=r1;*r0p=r0;}
+static void f2827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-5);*r1p=r1;*r0p=r0;}
+static void f2828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,8);*r1p=r1;*r0p=r0;}
+static void f2829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-8);*r1p=r1;*r0p=r0;}
+static void f2830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,7);*r1p=r1;*r0p=r0;}
+static void f2831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-9);*r1p=r1;*r0p=r0;}
+static void f2832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,16);*r1p=r1;*r0p=r0;}
+static void f2833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-16);*r1p=r1;*r0p=r0;}
+static void f2834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,15);*r1p=r1;*r0p=r0;}
+static void f2835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-17);*r1p=r1;*r0p=r0;}
+static void f2836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,32);*r1p=r1;*r0p=r0;}
+static void f2837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-32);*r1p=r1;*r0p=r0;}
+static void f2838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,31);*r1p=r1;*r0p=r0;}
+static void f2839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-33);*r1p=r1;*r0p=r0;}
+static void f2840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,64);*r1p=r1;*r0p=r0;}
+static void f2841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-64);*r1p=r1;*r0p=r0;}
+static void f2842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,63);*r1p=r1;*r0p=r0;}
+static void f2843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-65);*r1p=r1;*r0p=r0;}
+static void f2844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,128);*r1p=r1;*r0p=r0;}
+static void f2845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-128);*r1p=r1;*r0p=r0;}
+static void f2846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,127);*r1p=r1;*r0p=r0;}
+static void f2847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-129);*r1p=r1;*r0p=r0;}
+static void f2848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,256);*r1p=r1;*r0p=r0;}
+static void f2849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-256);*r1p=r1;*r0p=r0;}
+static void f2850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,255);*r1p=r1;*r0p=r0;}
+static void f2851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-257);*r1p=r1;*r0p=r0;}
+static void f2852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,512);*r1p=r1;*r0p=r0;}
+static void f2853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-512);*r1p=r1;*r0p=r0;}
+static void f2854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,511);*r1p=r1;*r0p=r0;}
+static void f2855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-513);*r1p=r1;*r0p=r0;}
+static void f2856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,1024);*r1p=r1;*r0p=r0;}
+static void f2857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,1023);*r1p=r1;*r0p=r0;}
+static void f2859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,2048);*r1p=r1;*r0p=r0;}
+static void f2861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,2047);*r1p=r1;*r0p=r0;}
+static void f2863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,4096);*r1p=r1;*r0p=r0;}
+static void f2865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,4095);*r1p=r1;*r0p=r0;}
+static void f2867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,8192);*r1p=r1;*r0p=r0;}
+static void f2869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,8191);*r1p=r1;*r0p=r0;}
+static void f2871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,16384);*r1p=r1;*r0p=r0;}
+static void f2873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,16383);*r1p=r1;*r0p=r0;}
+static void f2875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,32768);*r1p=r1;*r0p=r0;}
+static void f2877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,32767);*r1p=r1;*r0p=r0;}
+static void f2879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2048,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-1);*r1p=r1;*r0p=r0;}
+static void f2882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,0);*r1p=r1;*r0p=r0;}
+static void f2883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,2);*r1p=r1;*r0p=r0;}
+static void f2885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-2);*r1p=r1;*r0p=r0;}
+static void f2886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,1);*r1p=r1;*r0p=r0;}
+static void f2887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-3);*r1p=r1;*r0p=r0;}
+static void f2888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,4);*r1p=r1;*r0p=r0;}
+static void f2889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-4);*r1p=r1;*r0p=r0;}
+static void f2890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,3);*r1p=r1;*r0p=r0;}
+static void f2891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-5);*r1p=r1;*r0p=r0;}
+static void f2892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,8);*r1p=r1;*r0p=r0;}
+static void f2893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-8);*r1p=r1;*r0p=r0;}
+static void f2894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,7);*r1p=r1;*r0p=r0;}
+static void f2895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-9);*r1p=r1;*r0p=r0;}
+static void f2896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,16);*r1p=r1;*r0p=r0;}
+static void f2897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-16);*r1p=r1;*r0p=r0;}
+static void f2898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,15);*r1p=r1;*r0p=r0;}
+static void f2899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-17);*r1p=r1;*r0p=r0;}
+static void f2900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,32);*r1p=r1;*r0p=r0;}
+static void f2901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-32);*r1p=r1;*r0p=r0;}
+static void f2902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,31);*r1p=r1;*r0p=r0;}
+static void f2903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-33);*r1p=r1;*r0p=r0;}
+static void f2904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,64);*r1p=r1;*r0p=r0;}
+static void f2905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-64);*r1p=r1;*r0p=r0;}
+static void f2906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,63);*r1p=r1;*r0p=r0;}
+static void f2907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-65);*r1p=r1;*r0p=r0;}
+static void f2908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,128);*r1p=r1;*r0p=r0;}
+static void f2909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-128);*r1p=r1;*r0p=r0;}
+static void f2910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,127);*r1p=r1;*r0p=r0;}
+static void f2911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-129);*r1p=r1;*r0p=r0;}
+static void f2912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,256);*r1p=r1;*r0p=r0;}
+static void f2913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-256);*r1p=r1;*r0p=r0;}
+static void f2914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,255);*r1p=r1;*r0p=r0;}
+static void f2915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-257);*r1p=r1;*r0p=r0;}
+static void f2916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,512);*r1p=r1;*r0p=r0;}
+static void f2917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-512);*r1p=r1;*r0p=r0;}
+static void f2918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,511);*r1p=r1;*r0p=r0;}
+static void f2919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-513);*r1p=r1;*r0p=r0;}
+static void f2920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,1024);*r1p=r1;*r0p=r0;}
+static void f2921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,1023);*r1p=r1;*r0p=r0;}
+static void f2923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,2048);*r1p=r1;*r0p=r0;}
+static void f2925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,2047);*r1p=r1;*r0p=r0;}
+static void f2927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,4096);*r1p=r1;*r0p=r0;}
+static void f2929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,4095);*r1p=r1;*r0p=r0;}
+static void f2931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,8192);*r1p=r1;*r0p=r0;}
+static void f2933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,8191);*r1p=r1;*r0p=r0;}
+static void f2935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-8193);*r1p=r1;*r0p=r0;}
+static void f2936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,16384);*r1p=r1;*r0p=r0;}
+static void f2937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-16384);*r1p=r1;*r0p=r0;}
+static void f2938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,16383);*r1p=r1;*r0p=r0;}
+static void f2939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-16385);*r1p=r1;*r0p=r0;}
+static void f2940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,32768);*r1p=r1;*r0p=r0;}
+static void f2941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-32768);*r1p=r1;*r0p=r0;}
+static void f2942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,32767);*r1p=r1;*r0p=r0;}
+static void f2943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2048,0,-32769);*r1p=r1;*r0p=r0;}
+static void f2944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,1);*r1p=r1;*r0p=r0;}
+static void f2945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-1);*r1p=r1;*r0p=r0;}
+static void f2946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,0);*r1p=r1;*r0p=r0;}
+static void f2947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-2);*r1p=r1;*r0p=r0;}
+static void f2948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,2);*r1p=r1;*r0p=r0;}
+static void f2949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-2);*r1p=r1;*r0p=r0;}
+static void f2950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,1);*r1p=r1;*r0p=r0;}
+static void f2951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-3);*r1p=r1;*r0p=r0;}
+static void f2952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,4);*r1p=r1;*r0p=r0;}
+static void f2953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-4);*r1p=r1;*r0p=r0;}
+static void f2954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,3);*r1p=r1;*r0p=r0;}
+static void f2955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-5);*r1p=r1;*r0p=r0;}
+static void f2956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,8);*r1p=r1;*r0p=r0;}
+static void f2957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-8);*r1p=r1;*r0p=r0;}
+static void f2958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,7);*r1p=r1;*r0p=r0;}
+static void f2959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-9);*r1p=r1;*r0p=r0;}
+static void f2960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,16);*r1p=r1;*r0p=r0;}
+static void f2961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-16);*r1p=r1;*r0p=r0;}
+static void f2962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,15);*r1p=r1;*r0p=r0;}
+static void f2963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-17);*r1p=r1;*r0p=r0;}
+static void f2964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,32);*r1p=r1;*r0p=r0;}
+static void f2965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-32);*r1p=r1;*r0p=r0;}
+static void f2966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,31);*r1p=r1;*r0p=r0;}
+static void f2967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-33);*r1p=r1;*r0p=r0;}
+static void f2968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,64);*r1p=r1;*r0p=r0;}
+static void f2969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-64);*r1p=r1;*r0p=r0;}
+static void f2970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,63);*r1p=r1;*r0p=r0;}
+static void f2971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-65);*r1p=r1;*r0p=r0;}
+static void f2972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,128);*r1p=r1;*r0p=r0;}
+static void f2973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-128);*r1p=r1;*r0p=r0;}
+static void f2974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,127);*r1p=r1;*r0p=r0;}
+static void f2975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-129);*r1p=r1;*r0p=r0;}
+static void f2976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,256);*r1p=r1;*r0p=r0;}
+static void f2977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-256);*r1p=r1;*r0p=r0;}
+static void f2978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,255);*r1p=r1;*r0p=r0;}
+static void f2979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-257);*r1p=r1;*r0p=r0;}
+static void f2980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,512);*r1p=r1;*r0p=r0;}
+static void f2981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-512);*r1p=r1;*r0p=r0;}
+static void f2982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,511);*r1p=r1;*r0p=r0;}
+static void f2983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-513);*r1p=r1;*r0p=r0;}
+static void f2984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,1024);*r1p=r1;*r0p=r0;}
+static void f2985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-1024);*r1p=r1;*r0p=r0;}
+static void f2986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,1023);*r1p=r1;*r0p=r0;}
+static void f2987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-1025);*r1p=r1;*r0p=r0;}
+static void f2988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,2048);*r1p=r1;*r0p=r0;}
+static void f2989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-2048);*r1p=r1;*r0p=r0;}
+static void f2990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,2047);*r1p=r1;*r0p=r0;}
+static void f2991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-2049);*r1p=r1;*r0p=r0;}
+static void f2992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,4096);*r1p=r1;*r0p=r0;}
+static void f2993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-4096);*r1p=r1;*r0p=r0;}
+static void f2994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,4095);*r1p=r1;*r0p=r0;}
+static void f2995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-4097);*r1p=r1;*r0p=r0;}
+static void f2996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,8192);*r1p=r1;*r0p=r0;}
+static void f2997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-8192);*r1p=r1;*r0p=r0;}
+static void f2998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,8191);*r1p=r1;*r0p=r0;}
+static void f2999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,16384);*r1p=r1;*r0p=r0;}
+static void f3001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,16383);*r1p=r1;*r0p=r0;}
+static void f3003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,32768);*r1p=r1;*r0p=r0;}
+static void f3005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,32767);*r1p=r1;*r0p=r0;}
+static void f3007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,2047,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,1);*r1p=r1;*r0p=r0;}
+static void f3009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-1);*r1p=r1;*r0p=r0;}
+static void f3010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,0);*r1p=r1;*r0p=r0;}
+static void f3011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-2);*r1p=r1;*r0p=r0;}
+static void f3012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,2);*r1p=r1;*r0p=r0;}
+static void f3013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-2);*r1p=r1;*r0p=r0;}
+static void f3014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,1);*r1p=r1;*r0p=r0;}
+static void f3015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-3);*r1p=r1;*r0p=r0;}
+static void f3016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,4);*r1p=r1;*r0p=r0;}
+static void f3017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-4);*r1p=r1;*r0p=r0;}
+static void f3018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,3);*r1p=r1;*r0p=r0;}
+static void f3019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-5);*r1p=r1;*r0p=r0;}
+static void f3020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,8);*r1p=r1;*r0p=r0;}
+static void f3021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-8);*r1p=r1;*r0p=r0;}
+static void f3022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,7);*r1p=r1;*r0p=r0;}
+static void f3023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-9);*r1p=r1;*r0p=r0;}
+static void f3024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,16);*r1p=r1;*r0p=r0;}
+static void f3025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-16);*r1p=r1;*r0p=r0;}
+static void f3026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,15);*r1p=r1;*r0p=r0;}
+static void f3027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-17);*r1p=r1;*r0p=r0;}
+static void f3028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,32);*r1p=r1;*r0p=r0;}
+static void f3029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-32);*r1p=r1;*r0p=r0;}
+static void f3030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,31);*r1p=r1;*r0p=r0;}
+static void f3031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-33);*r1p=r1;*r0p=r0;}
+static void f3032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,64);*r1p=r1;*r0p=r0;}
+static void f3033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-64);*r1p=r1;*r0p=r0;}
+static void f3034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,63);*r1p=r1;*r0p=r0;}
+static void f3035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-65);*r1p=r1;*r0p=r0;}
+static void f3036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,128);*r1p=r1;*r0p=r0;}
+static void f3037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-128);*r1p=r1;*r0p=r0;}
+static void f3038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,127);*r1p=r1;*r0p=r0;}
+static void f3039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-129);*r1p=r1;*r0p=r0;}
+static void f3040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,256);*r1p=r1;*r0p=r0;}
+static void f3041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-256);*r1p=r1;*r0p=r0;}
+static void f3042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,255);*r1p=r1;*r0p=r0;}
+static void f3043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-257);*r1p=r1;*r0p=r0;}
+static void f3044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,512);*r1p=r1;*r0p=r0;}
+static void f3045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-512);*r1p=r1;*r0p=r0;}
+static void f3046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,511);*r1p=r1;*r0p=r0;}
+static void f3047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-513);*r1p=r1;*r0p=r0;}
+static void f3048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,1024);*r1p=r1;*r0p=r0;}
+static void f3049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,1023);*r1p=r1;*r0p=r0;}
+static void f3051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,2048);*r1p=r1;*r0p=r0;}
+static void f3053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,2047);*r1p=r1;*r0p=r0;}
+static void f3055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,4096);*r1p=r1;*r0p=r0;}
+static void f3057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,4095);*r1p=r1;*r0p=r0;}
+static void f3059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,8192);*r1p=r1;*r0p=r0;}
+static void f3061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,8191);*r1p=r1;*r0p=r0;}
+static void f3063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,16384);*r1p=r1;*r0p=r0;}
+static void f3065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,16383);*r1p=r1;*r0p=r0;}
+static void f3067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,32768);*r1p=r1;*r0p=r0;}
+static void f3069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,32767);*r1p=r1;*r0p=r0;}
+static void f3071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-2049,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-1);*r1p=r1;*r0p=r0;}
+static void f3074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,0);*r1p=r1;*r0p=r0;}
+static void f3075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,2);*r1p=r1;*r0p=r0;}
+static void f3077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-3);*r1p=r1;*r0p=r0;}
+static void f3080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,4);*r1p=r1;*r0p=r0;}
+static void f3081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-4);*r1p=r1;*r0p=r0;}
+static void f3082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,3);*r1p=r1;*r0p=r0;}
+static void f3083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-5);*r1p=r1;*r0p=r0;}
+static void f3084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,8);*r1p=r1;*r0p=r0;}
+static void f3085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-8);*r1p=r1;*r0p=r0;}
+static void f3086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,7);*r1p=r1;*r0p=r0;}
+static void f3087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-9);*r1p=r1;*r0p=r0;}
+static void f3088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,16);*r1p=r1;*r0p=r0;}
+static void f3089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-16);*r1p=r1;*r0p=r0;}
+static void f3090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,15);*r1p=r1;*r0p=r0;}
+static void f3091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-17);*r1p=r1;*r0p=r0;}
+static void f3092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,32);*r1p=r1;*r0p=r0;}
+static void f3093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-32);*r1p=r1;*r0p=r0;}
+static void f3094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,31);*r1p=r1;*r0p=r0;}
+static void f3095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-33);*r1p=r1;*r0p=r0;}
+static void f3096(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,64);*r1p=r1;*r0p=r0;}
+static void f3097(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-64);*r1p=r1;*r0p=r0;}
+static void f3098(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,63);*r1p=r1;*r0p=r0;}
+static void f3099(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-65);*r1p=r1;*r0p=r0;}
+static void f3100(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,128);*r1p=r1;*r0p=r0;}
+static void f3101(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-128);*r1p=r1;*r0p=r0;}
+static void f3102(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,127);*r1p=r1;*r0p=r0;}
+static void f3103(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-129);*r1p=r1;*r0p=r0;}
+static void f3104(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,256);*r1p=r1;*r0p=r0;}
+static void f3105(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-256);*r1p=r1;*r0p=r0;}
+static void f3106(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,255);*r1p=r1;*r0p=r0;}
+static void f3107(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-257);*r1p=r1;*r0p=r0;}
+static void f3108(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,512);*r1p=r1;*r0p=r0;}
+static void f3109(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-512);*r1p=r1;*r0p=r0;}
+static void f3110(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,511);*r1p=r1;*r0p=r0;}
+static void f3111(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-513);*r1p=r1;*r0p=r0;}
+static void f3112(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,1024);*r1p=r1;*r0p=r0;}
+static void f3113(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3114(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,1023);*r1p=r1;*r0p=r0;}
+static void f3115(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3116(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,2048);*r1p=r1;*r0p=r0;}
+static void f3117(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3118(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,2047);*r1p=r1;*r0p=r0;}
+static void f3119(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3120(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,4096);*r1p=r1;*r0p=r0;}
+static void f3121(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3122(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,4095);*r1p=r1;*r0p=r0;}
+static void f3123(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3124(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,8192);*r1p=r1;*r0p=r0;}
+static void f3125(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3126(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,8191);*r1p=r1;*r0p=r0;}
+static void f3127(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3128(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,16384);*r1p=r1;*r0p=r0;}
+static void f3129(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3130(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,16383);*r1p=r1;*r0p=r0;}
+static void f3131(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3132(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,32768);*r1p=r1;*r0p=r0;}
+static void f3133(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3134(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,32767);*r1p=r1;*r0p=r0;}
+static void f3135(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4096,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3136(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3137(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-1);*r1p=r1;*r0p=r0;}
+static void f3138(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,0);*r1p=r1;*r0p=r0;}
+static void f3139(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3140(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,2);*r1p=r1;*r0p=r0;}
+static void f3141(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-2);*r1p=r1;*r0p=r0;}
+static void f3142(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,1);*r1p=r1;*r0p=r0;}
+static void f3143(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-3);*r1p=r1;*r0p=r0;}
+static void f3144(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,4);*r1p=r1;*r0p=r0;}
+static void f3145(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-4);*r1p=r1;*r0p=r0;}
+static void f3146(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,3);*r1p=r1;*r0p=r0;}
+static void f3147(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-5);*r1p=r1;*r0p=r0;}
+static void f3148(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,8);*r1p=r1;*r0p=r0;}
+static void f3149(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-8);*r1p=r1;*r0p=r0;}
+static void f3150(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,7);*r1p=r1;*r0p=r0;}
+static void f3151(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-9);*r1p=r1;*r0p=r0;}
+static void f3152(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,16);*r1p=r1;*r0p=r0;}
+static void f3153(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-16);*r1p=r1;*r0p=r0;}
+static void f3154(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,15);*r1p=r1;*r0p=r0;}
+static void f3155(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-17);*r1p=r1;*r0p=r0;}
+static void f3156(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,32);*r1p=r1;*r0p=r0;}
+static void f3157(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-32);*r1p=r1;*r0p=r0;}
+static void f3158(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,31);*r1p=r1;*r0p=r0;}
+static void f3159(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-33);*r1p=r1;*r0p=r0;}
+static void f3160(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,64);*r1p=r1;*r0p=r0;}
+static void f3161(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-64);*r1p=r1;*r0p=r0;}
+static void f3162(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,63);*r1p=r1;*r0p=r0;}
+static void f3163(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-65);*r1p=r1;*r0p=r0;}
+static void f3164(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,128);*r1p=r1;*r0p=r0;}
+static void f3165(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-128);*r1p=r1;*r0p=r0;}
+static void f3166(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,127);*r1p=r1;*r0p=r0;}
+static void f3167(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-129);*r1p=r1;*r0p=r0;}
+static void f3168(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,256);*r1p=r1;*r0p=r0;}
+static void f3169(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-256);*r1p=r1;*r0p=r0;}
+static void f3170(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,255);*r1p=r1;*r0p=r0;}
+static void f3171(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-257);*r1p=r1;*r0p=r0;}
+static void f3172(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,512);*r1p=r1;*r0p=r0;}
+static void f3173(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-512);*r1p=r1;*r0p=r0;}
+static void f3174(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,511);*r1p=r1;*r0p=r0;}
+static void f3175(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-513);*r1p=r1;*r0p=r0;}
+static void f3176(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,1024);*r1p=r1;*r0p=r0;}
+static void f3177(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3178(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,1023);*r1p=r1;*r0p=r0;}
+static void f3179(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3180(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,2048);*r1p=r1;*r0p=r0;}
+static void f3181(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3182(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,2047);*r1p=r1;*r0p=r0;}
+static void f3183(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3184(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,4096);*r1p=r1;*r0p=r0;}
+static void f3185(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3186(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,4095);*r1p=r1;*r0p=r0;}
+static void f3187(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3188(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,8192);*r1p=r1;*r0p=r0;}
+static void f3189(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3190(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,8191);*r1p=r1;*r0p=r0;}
+static void f3191(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3192(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,16384);*r1p=r1;*r0p=r0;}
+static void f3193(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3194(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,16383);*r1p=r1;*r0p=r0;}
+static void f3195(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3196(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,32768);*r1p=r1;*r0p=r0;}
+static void f3197(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3198(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,32767);*r1p=r1;*r0p=r0;}
+static void f3199(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4096,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3200(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,1);*r1p=r1;*r0p=r0;}
+static void f3201(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-1);*r1p=r1;*r0p=r0;}
+static void f3202(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,0);*r1p=r1;*r0p=r0;}
+static void f3203(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-2);*r1p=r1;*r0p=r0;}
+static void f3204(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,2);*r1p=r1;*r0p=r0;}
+static void f3205(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-2);*r1p=r1;*r0p=r0;}
+static void f3206(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,1);*r1p=r1;*r0p=r0;}
+static void f3207(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-3);*r1p=r1;*r0p=r0;}
+static void f3208(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,4);*r1p=r1;*r0p=r0;}
+static void f3209(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-4);*r1p=r1;*r0p=r0;}
+static void f3210(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,3);*r1p=r1;*r0p=r0;}
+static void f3211(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-5);*r1p=r1;*r0p=r0;}
+static void f3212(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,8);*r1p=r1;*r0p=r0;}
+static void f3213(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-8);*r1p=r1;*r0p=r0;}
+static void f3214(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,7);*r1p=r1;*r0p=r0;}
+static void f3215(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-9);*r1p=r1;*r0p=r0;}
+static void f3216(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,16);*r1p=r1;*r0p=r0;}
+static void f3217(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-16);*r1p=r1;*r0p=r0;}
+static void f3218(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,15);*r1p=r1;*r0p=r0;}
+static void f3219(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-17);*r1p=r1;*r0p=r0;}
+static void f3220(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,32);*r1p=r1;*r0p=r0;}
+static void f3221(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-32);*r1p=r1;*r0p=r0;}
+static void f3222(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,31);*r1p=r1;*r0p=r0;}
+static void f3223(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-33);*r1p=r1;*r0p=r0;}
+static void f3224(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,64);*r1p=r1;*r0p=r0;}
+static void f3225(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-64);*r1p=r1;*r0p=r0;}
+static void f3226(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,63);*r1p=r1;*r0p=r0;}
+static void f3227(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-65);*r1p=r1;*r0p=r0;}
+static void f3228(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,128);*r1p=r1;*r0p=r0;}
+static void f3229(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-128);*r1p=r1;*r0p=r0;}
+static void f3230(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,127);*r1p=r1;*r0p=r0;}
+static void f3231(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-129);*r1p=r1;*r0p=r0;}
+static void f3232(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,256);*r1p=r1;*r0p=r0;}
+static void f3233(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-256);*r1p=r1;*r0p=r0;}
+static void f3234(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,255);*r1p=r1;*r0p=r0;}
+static void f3235(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-257);*r1p=r1;*r0p=r0;}
+static void f3236(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,512);*r1p=r1;*r0p=r0;}
+static void f3237(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-512);*r1p=r1;*r0p=r0;}
+static void f3238(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,511);*r1p=r1;*r0p=r0;}
+static void f3239(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-513);*r1p=r1;*r0p=r0;}
+static void f3240(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,1024);*r1p=r1;*r0p=r0;}
+static void f3241(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3242(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,1023);*r1p=r1;*r0p=r0;}
+static void f3243(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3244(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,2048);*r1p=r1;*r0p=r0;}
+static void f3245(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3246(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,2047);*r1p=r1;*r0p=r0;}
+static void f3247(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3248(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,4096);*r1p=r1;*r0p=r0;}
+static void f3249(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3250(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,4095);*r1p=r1;*r0p=r0;}
+static void f3251(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3252(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,8192);*r1p=r1;*r0p=r0;}
+static void f3253(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3254(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,8191);*r1p=r1;*r0p=r0;}
+static void f3255(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3256(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,16384);*r1p=r1;*r0p=r0;}
+static void f3257(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3258(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,16383);*r1p=r1;*r0p=r0;}
+static void f3259(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3260(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,32768);*r1p=r1;*r0p=r0;}
+static void f3261(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3262(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,32767);*r1p=r1;*r0p=r0;}
+static void f3263(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,4095,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3264(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,1);*r1p=r1;*r0p=r0;}
+static void f3265(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-1);*r1p=r1;*r0p=r0;}
+static void f3266(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,0);*r1p=r1;*r0p=r0;}
+static void f3267(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-2);*r1p=r1;*r0p=r0;}
+static void f3268(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,2);*r1p=r1;*r0p=r0;}
+static void f3269(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-2);*r1p=r1;*r0p=r0;}
+static void f3270(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,1);*r1p=r1;*r0p=r0;}
+static void f3271(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-3);*r1p=r1;*r0p=r0;}
+static void f3272(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,4);*r1p=r1;*r0p=r0;}
+static void f3273(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-4);*r1p=r1;*r0p=r0;}
+static void f3274(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,3);*r1p=r1;*r0p=r0;}
+static void f3275(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-5);*r1p=r1;*r0p=r0;}
+static void f3276(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,8);*r1p=r1;*r0p=r0;}
+static void f3277(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-8);*r1p=r1;*r0p=r0;}
+static void f3278(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,7);*r1p=r1;*r0p=r0;}
+static void f3279(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-9);*r1p=r1;*r0p=r0;}
+static void f3280(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,16);*r1p=r1;*r0p=r0;}
+static void f3281(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-16);*r1p=r1;*r0p=r0;}
+static void f3282(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,15);*r1p=r1;*r0p=r0;}
+static void f3283(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-17);*r1p=r1;*r0p=r0;}
+static void f3284(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,32);*r1p=r1;*r0p=r0;}
+static void f3285(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-32);*r1p=r1;*r0p=r0;}
+static void f3286(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,31);*r1p=r1;*r0p=r0;}
+static void f3287(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-33);*r1p=r1;*r0p=r0;}
+static void f3288(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,64);*r1p=r1;*r0p=r0;}
+static void f3289(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-64);*r1p=r1;*r0p=r0;}
+static void f3290(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,63);*r1p=r1;*r0p=r0;}
+static void f3291(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-65);*r1p=r1;*r0p=r0;}
+static void f3292(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,128);*r1p=r1;*r0p=r0;}
+static void f3293(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-128);*r1p=r1;*r0p=r0;}
+static void f3294(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,127);*r1p=r1;*r0p=r0;}
+static void f3295(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-129);*r1p=r1;*r0p=r0;}
+static void f3296(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,256);*r1p=r1;*r0p=r0;}
+static void f3297(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-256);*r1p=r1;*r0p=r0;}
+static void f3298(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,255);*r1p=r1;*r0p=r0;}
+static void f3299(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-257);*r1p=r1;*r0p=r0;}
+static void f3300(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,512);*r1p=r1;*r0p=r0;}
+static void f3301(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-512);*r1p=r1;*r0p=r0;}
+static void f3302(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,511);*r1p=r1;*r0p=r0;}
+static void f3303(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-513);*r1p=r1;*r0p=r0;}
+static void f3304(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,1024);*r1p=r1;*r0p=r0;}
+static void f3305(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3306(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,1023);*r1p=r1;*r0p=r0;}
+static void f3307(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3308(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,2048);*r1p=r1;*r0p=r0;}
+static void f3309(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3310(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,2047);*r1p=r1;*r0p=r0;}
+static void f3311(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3312(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,4096);*r1p=r1;*r0p=r0;}
+static void f3313(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3314(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,4095);*r1p=r1;*r0p=r0;}
+static void f3315(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3316(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,8192);*r1p=r1;*r0p=r0;}
+static void f3317(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3318(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,8191);*r1p=r1;*r0p=r0;}
+static void f3319(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3320(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,16384);*r1p=r1;*r0p=r0;}
+static void f3321(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3322(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,16383);*r1p=r1;*r0p=r0;}
+static void f3323(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3324(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,32768);*r1p=r1;*r0p=r0;}
+static void f3325(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3326(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,32767);*r1p=r1;*r0p=r0;}
+static void f3327(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-4097,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3328(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3329(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-1);*r1p=r1;*r0p=r0;}
+static void f3330(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,0);*r1p=r1;*r0p=r0;}
+static void f3331(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3332(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,2);*r1p=r1;*r0p=r0;}
+static void f3333(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3334(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3335(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-3);*r1p=r1;*r0p=r0;}
+static void f3336(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,4);*r1p=r1;*r0p=r0;}
+static void f3337(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-4);*r1p=r1;*r0p=r0;}
+static void f3338(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,3);*r1p=r1;*r0p=r0;}
+static void f3339(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-5);*r1p=r1;*r0p=r0;}
+static void f3340(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,8);*r1p=r1;*r0p=r0;}
+static void f3341(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-8);*r1p=r1;*r0p=r0;}
+static void f3342(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,7);*r1p=r1;*r0p=r0;}
+static void f3343(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-9);*r1p=r1;*r0p=r0;}
+static void f3344(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,16);*r1p=r1;*r0p=r0;}
+static void f3345(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-16);*r1p=r1;*r0p=r0;}
+static void f3346(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,15);*r1p=r1;*r0p=r0;}
+static void f3347(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-17);*r1p=r1;*r0p=r0;}
+static void f3348(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,32);*r1p=r1;*r0p=r0;}
+static void f3349(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-32);*r1p=r1;*r0p=r0;}
+static void f3350(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,31);*r1p=r1;*r0p=r0;}
+static void f3351(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-33);*r1p=r1;*r0p=r0;}
+static void f3352(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,64);*r1p=r1;*r0p=r0;}
+static void f3353(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-64);*r1p=r1;*r0p=r0;}
+static void f3354(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,63);*r1p=r1;*r0p=r0;}
+static void f3355(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-65);*r1p=r1;*r0p=r0;}
+static void f3356(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,128);*r1p=r1;*r0p=r0;}
+static void f3357(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-128);*r1p=r1;*r0p=r0;}
+static void f3358(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,127);*r1p=r1;*r0p=r0;}
+static void f3359(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-129);*r1p=r1;*r0p=r0;}
+static void f3360(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,256);*r1p=r1;*r0p=r0;}
+static void f3361(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-256);*r1p=r1;*r0p=r0;}
+static void f3362(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,255);*r1p=r1;*r0p=r0;}
+static void f3363(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-257);*r1p=r1;*r0p=r0;}
+static void f3364(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,512);*r1p=r1;*r0p=r0;}
+static void f3365(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-512);*r1p=r1;*r0p=r0;}
+static void f3366(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,511);*r1p=r1;*r0p=r0;}
+static void f3367(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-513);*r1p=r1;*r0p=r0;}
+static void f3368(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,1024);*r1p=r1;*r0p=r0;}
+static void f3369(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3370(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,1023);*r1p=r1;*r0p=r0;}
+static void f3371(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3372(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,2048);*r1p=r1;*r0p=r0;}
+static void f3373(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3374(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,2047);*r1p=r1;*r0p=r0;}
+static void f3375(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3376(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,4096);*r1p=r1;*r0p=r0;}
+static void f3377(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3378(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,4095);*r1p=r1;*r0p=r0;}
+static void f3379(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3380(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,8192);*r1p=r1;*r0p=r0;}
+static void f3381(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3382(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,8191);*r1p=r1;*r0p=r0;}
+static void f3383(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3384(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,16384);*r1p=r1;*r0p=r0;}
+static void f3385(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3386(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,16383);*r1p=r1;*r0p=r0;}
+static void f3387(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3388(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,32768);*r1p=r1;*r0p=r0;}
+static void f3389(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3390(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,32767);*r1p=r1;*r0p=r0;}
+static void f3391(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8192,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3392(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3393(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-1);*r1p=r1;*r0p=r0;}
+static void f3394(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,0);*r1p=r1;*r0p=r0;}
+static void f3395(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3396(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,2);*r1p=r1;*r0p=r0;}
+static void f3397(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-2);*r1p=r1;*r0p=r0;}
+static void f3398(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,1);*r1p=r1;*r0p=r0;}
+static void f3399(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-3);*r1p=r1;*r0p=r0;}
+static void f3400(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,4);*r1p=r1;*r0p=r0;}
+static void f3401(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-4);*r1p=r1;*r0p=r0;}
+static void f3402(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,3);*r1p=r1;*r0p=r0;}
+static void f3403(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-5);*r1p=r1;*r0p=r0;}
+static void f3404(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,8);*r1p=r1;*r0p=r0;}
+static void f3405(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-8);*r1p=r1;*r0p=r0;}
+static void f3406(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,7);*r1p=r1;*r0p=r0;}
+static void f3407(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-9);*r1p=r1;*r0p=r0;}
+static void f3408(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,16);*r1p=r1;*r0p=r0;}
+static void f3409(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-16);*r1p=r1;*r0p=r0;}
+static void f3410(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,15);*r1p=r1;*r0p=r0;}
+static void f3411(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-17);*r1p=r1;*r0p=r0;}
+static void f3412(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,32);*r1p=r1;*r0p=r0;}
+static void f3413(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-32);*r1p=r1;*r0p=r0;}
+static void f3414(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,31);*r1p=r1;*r0p=r0;}
+static void f3415(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-33);*r1p=r1;*r0p=r0;}
+static void f3416(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,64);*r1p=r1;*r0p=r0;}
+static void f3417(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-64);*r1p=r1;*r0p=r0;}
+static void f3418(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,63);*r1p=r1;*r0p=r0;}
+static void f3419(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-65);*r1p=r1;*r0p=r0;}
+static void f3420(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,128);*r1p=r1;*r0p=r0;}
+static void f3421(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-128);*r1p=r1;*r0p=r0;}
+static void f3422(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,127);*r1p=r1;*r0p=r0;}
+static void f3423(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-129);*r1p=r1;*r0p=r0;}
+static void f3424(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,256);*r1p=r1;*r0p=r0;}
+static void f3425(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-256);*r1p=r1;*r0p=r0;}
+static void f3426(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,255);*r1p=r1;*r0p=r0;}
+static void f3427(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-257);*r1p=r1;*r0p=r0;}
+static void f3428(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,512);*r1p=r1;*r0p=r0;}
+static void f3429(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-512);*r1p=r1;*r0p=r0;}
+static void f3430(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,511);*r1p=r1;*r0p=r0;}
+static void f3431(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-513);*r1p=r1;*r0p=r0;}
+static void f3432(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,1024);*r1p=r1;*r0p=r0;}
+static void f3433(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3434(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,1023);*r1p=r1;*r0p=r0;}
+static void f3435(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3436(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,2048);*r1p=r1;*r0p=r0;}
+static void f3437(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3438(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,2047);*r1p=r1;*r0p=r0;}
+static void f3439(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3440(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,4096);*r1p=r1;*r0p=r0;}
+static void f3441(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3442(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,4095);*r1p=r1;*r0p=r0;}
+static void f3443(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3444(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,8192);*r1p=r1;*r0p=r0;}
+static void f3445(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3446(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,8191);*r1p=r1;*r0p=r0;}
+static void f3447(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3448(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,16384);*r1p=r1;*r0p=r0;}
+static void f3449(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3450(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,16383);*r1p=r1;*r0p=r0;}
+static void f3451(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3452(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,32768);*r1p=r1;*r0p=r0;}
+static void f3453(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3454(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,32767);*r1p=r1;*r0p=r0;}
+static void f3455(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8192,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3456(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,1);*r1p=r1;*r0p=r0;}
+static void f3457(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-1);*r1p=r1;*r0p=r0;}
+static void f3458(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,0);*r1p=r1;*r0p=r0;}
+static void f3459(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-2);*r1p=r1;*r0p=r0;}
+static void f3460(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,2);*r1p=r1;*r0p=r0;}
+static void f3461(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-2);*r1p=r1;*r0p=r0;}
+static void f3462(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,1);*r1p=r1;*r0p=r0;}
+static void f3463(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-3);*r1p=r1;*r0p=r0;}
+static void f3464(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,4);*r1p=r1;*r0p=r0;}
+static void f3465(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-4);*r1p=r1;*r0p=r0;}
+static void f3466(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,3);*r1p=r1;*r0p=r0;}
+static void f3467(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-5);*r1p=r1;*r0p=r0;}
+static void f3468(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,8);*r1p=r1;*r0p=r0;}
+static void f3469(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-8);*r1p=r1;*r0p=r0;}
+static void f3470(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,7);*r1p=r1;*r0p=r0;}
+static void f3471(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-9);*r1p=r1;*r0p=r0;}
+static void f3472(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,16);*r1p=r1;*r0p=r0;}
+static void f3473(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-16);*r1p=r1;*r0p=r0;}
+static void f3474(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,15);*r1p=r1;*r0p=r0;}
+static void f3475(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-17);*r1p=r1;*r0p=r0;}
+static void f3476(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,32);*r1p=r1;*r0p=r0;}
+static void f3477(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-32);*r1p=r1;*r0p=r0;}
+static void f3478(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,31);*r1p=r1;*r0p=r0;}
+static void f3479(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-33);*r1p=r1;*r0p=r0;}
+static void f3480(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,64);*r1p=r1;*r0p=r0;}
+static void f3481(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-64);*r1p=r1;*r0p=r0;}
+static void f3482(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,63);*r1p=r1;*r0p=r0;}
+static void f3483(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-65);*r1p=r1;*r0p=r0;}
+static void f3484(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,128);*r1p=r1;*r0p=r0;}
+static void f3485(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-128);*r1p=r1;*r0p=r0;}
+static void f3486(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,127);*r1p=r1;*r0p=r0;}
+static void f3487(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-129);*r1p=r1;*r0p=r0;}
+static void f3488(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,256);*r1p=r1;*r0p=r0;}
+static void f3489(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-256);*r1p=r1;*r0p=r0;}
+static void f3490(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,255);*r1p=r1;*r0p=r0;}
+static void f3491(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-257);*r1p=r1;*r0p=r0;}
+static void f3492(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,512);*r1p=r1;*r0p=r0;}
+static void f3493(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-512);*r1p=r1;*r0p=r0;}
+static void f3494(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,511);*r1p=r1;*r0p=r0;}
+static void f3495(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-513);*r1p=r1;*r0p=r0;}
+static void f3496(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,1024);*r1p=r1;*r0p=r0;}
+static void f3497(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3498(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,1023);*r1p=r1;*r0p=r0;}
+static void f3499(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3500(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,2048);*r1p=r1;*r0p=r0;}
+static void f3501(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3502(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,2047);*r1p=r1;*r0p=r0;}
+static void f3503(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3504(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,4096);*r1p=r1;*r0p=r0;}
+static void f3505(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3506(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,4095);*r1p=r1;*r0p=r0;}
+static void f3507(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3508(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,8192);*r1p=r1;*r0p=r0;}
+static void f3509(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3510(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,8191);*r1p=r1;*r0p=r0;}
+static void f3511(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3512(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,16384);*r1p=r1;*r0p=r0;}
+static void f3513(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3514(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,16383);*r1p=r1;*r0p=r0;}
+static void f3515(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3516(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,32768);*r1p=r1;*r0p=r0;}
+static void f3517(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3518(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,32767);*r1p=r1;*r0p=r0;}
+static void f3519(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,8191,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3520(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,1);*r1p=r1;*r0p=r0;}
+static void f3521(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-1);*r1p=r1;*r0p=r0;}
+static void f3522(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,0);*r1p=r1;*r0p=r0;}
+static void f3523(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-2);*r1p=r1;*r0p=r0;}
+static void f3524(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,2);*r1p=r1;*r0p=r0;}
+static void f3525(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-2);*r1p=r1;*r0p=r0;}
+static void f3526(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,1);*r1p=r1;*r0p=r0;}
+static void f3527(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-3);*r1p=r1;*r0p=r0;}
+static void f3528(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,4);*r1p=r1;*r0p=r0;}
+static void f3529(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-4);*r1p=r1;*r0p=r0;}
+static void f3530(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,3);*r1p=r1;*r0p=r0;}
+static void f3531(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-5);*r1p=r1;*r0p=r0;}
+static void f3532(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,8);*r1p=r1;*r0p=r0;}
+static void f3533(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-8);*r1p=r1;*r0p=r0;}
+static void f3534(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,7);*r1p=r1;*r0p=r0;}
+static void f3535(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-9);*r1p=r1;*r0p=r0;}
+static void f3536(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,16);*r1p=r1;*r0p=r0;}
+static void f3537(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-16);*r1p=r1;*r0p=r0;}
+static void f3538(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,15);*r1p=r1;*r0p=r0;}
+static void f3539(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-17);*r1p=r1;*r0p=r0;}
+static void f3540(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,32);*r1p=r1;*r0p=r0;}
+static void f3541(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-32);*r1p=r1;*r0p=r0;}
+static void f3542(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,31);*r1p=r1;*r0p=r0;}
+static void f3543(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-33);*r1p=r1;*r0p=r0;}
+static void f3544(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,64);*r1p=r1;*r0p=r0;}
+static void f3545(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-64);*r1p=r1;*r0p=r0;}
+static void f3546(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,63);*r1p=r1;*r0p=r0;}
+static void f3547(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-65);*r1p=r1;*r0p=r0;}
+static void f3548(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,128);*r1p=r1;*r0p=r0;}
+static void f3549(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-128);*r1p=r1;*r0p=r0;}
+static void f3550(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,127);*r1p=r1;*r0p=r0;}
+static void f3551(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-129);*r1p=r1;*r0p=r0;}
+static void f3552(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,256);*r1p=r1;*r0p=r0;}
+static void f3553(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-256);*r1p=r1;*r0p=r0;}
+static void f3554(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,255);*r1p=r1;*r0p=r0;}
+static void f3555(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-257);*r1p=r1;*r0p=r0;}
+static void f3556(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,512);*r1p=r1;*r0p=r0;}
+static void f3557(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-512);*r1p=r1;*r0p=r0;}
+static void f3558(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,511);*r1p=r1;*r0p=r0;}
+static void f3559(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-513);*r1p=r1;*r0p=r0;}
+static void f3560(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,1024);*r1p=r1;*r0p=r0;}
+static void f3561(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3562(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,1023);*r1p=r1;*r0p=r0;}
+static void f3563(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3564(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,2048);*r1p=r1;*r0p=r0;}
+static void f3565(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3566(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,2047);*r1p=r1;*r0p=r0;}
+static void f3567(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3568(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,4096);*r1p=r1;*r0p=r0;}
+static void f3569(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3570(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,4095);*r1p=r1;*r0p=r0;}
+static void f3571(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3572(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,8192);*r1p=r1;*r0p=r0;}
+static void f3573(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3574(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,8191);*r1p=r1;*r0p=r0;}
+static void f3575(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3576(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,16384);*r1p=r1;*r0p=r0;}
+static void f3577(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3578(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,16383);*r1p=r1;*r0p=r0;}
+static void f3579(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3580(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,32768);*r1p=r1;*r0p=r0;}
+static void f3581(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3582(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,32767);*r1p=r1;*r0p=r0;}
+static void f3583(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-8193,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3584(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3585(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-1);*r1p=r1;*r0p=r0;}
+static void f3586(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,0);*r1p=r1;*r0p=r0;}
+static void f3587(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3588(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,2);*r1p=r1;*r0p=r0;}
+static void f3589(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3590(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3591(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-3);*r1p=r1;*r0p=r0;}
+static void f3592(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,4);*r1p=r1;*r0p=r0;}
+static void f3593(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-4);*r1p=r1;*r0p=r0;}
+static void f3594(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,3);*r1p=r1;*r0p=r0;}
+static void f3595(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-5);*r1p=r1;*r0p=r0;}
+static void f3596(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,8);*r1p=r1;*r0p=r0;}
+static void f3597(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-8);*r1p=r1;*r0p=r0;}
+static void f3598(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,7);*r1p=r1;*r0p=r0;}
+static void f3599(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-9);*r1p=r1;*r0p=r0;}
+static void f3600(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,16);*r1p=r1;*r0p=r0;}
+static void f3601(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-16);*r1p=r1;*r0p=r0;}
+static void f3602(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,15);*r1p=r1;*r0p=r0;}
+static void f3603(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-17);*r1p=r1;*r0p=r0;}
+static void f3604(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,32);*r1p=r1;*r0p=r0;}
+static void f3605(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-32);*r1p=r1;*r0p=r0;}
+static void f3606(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,31);*r1p=r1;*r0p=r0;}
+static void f3607(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-33);*r1p=r1;*r0p=r0;}
+static void f3608(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,64);*r1p=r1;*r0p=r0;}
+static void f3609(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-64);*r1p=r1;*r0p=r0;}
+static void f3610(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,63);*r1p=r1;*r0p=r0;}
+static void f3611(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-65);*r1p=r1;*r0p=r0;}
+static void f3612(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,128);*r1p=r1;*r0p=r0;}
+static void f3613(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-128);*r1p=r1;*r0p=r0;}
+static void f3614(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,127);*r1p=r1;*r0p=r0;}
+static void f3615(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-129);*r1p=r1;*r0p=r0;}
+static void f3616(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,256);*r1p=r1;*r0p=r0;}
+static void f3617(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-256);*r1p=r1;*r0p=r0;}
+static void f3618(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,255);*r1p=r1;*r0p=r0;}
+static void f3619(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-257);*r1p=r1;*r0p=r0;}
+static void f3620(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,512);*r1p=r1;*r0p=r0;}
+static void f3621(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-512);*r1p=r1;*r0p=r0;}
+static void f3622(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,511);*r1p=r1;*r0p=r0;}
+static void f3623(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-513);*r1p=r1;*r0p=r0;}
+static void f3624(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,1024);*r1p=r1;*r0p=r0;}
+static void f3625(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3626(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,1023);*r1p=r1;*r0p=r0;}
+static void f3627(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3628(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,2048);*r1p=r1;*r0p=r0;}
+static void f3629(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3630(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,2047);*r1p=r1;*r0p=r0;}
+static void f3631(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3632(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,4096);*r1p=r1;*r0p=r0;}
+static void f3633(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3634(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,4095);*r1p=r1;*r0p=r0;}
+static void f3635(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3636(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,8192);*r1p=r1;*r0p=r0;}
+static void f3637(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3638(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,8191);*r1p=r1;*r0p=r0;}
+static void f3639(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3640(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,16384);*r1p=r1;*r0p=r0;}
+static void f3641(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3642(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,16383);*r1p=r1;*r0p=r0;}
+static void f3643(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3644(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,32768);*r1p=r1;*r0p=r0;}
+static void f3645(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3646(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,32767);*r1p=r1;*r0p=r0;}
+static void f3647(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16384,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3648(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3649(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-1);*r1p=r1;*r0p=r0;}
+static void f3650(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,0);*r1p=r1;*r0p=r0;}
+static void f3651(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3652(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,2);*r1p=r1;*r0p=r0;}
+static void f3653(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-2);*r1p=r1;*r0p=r0;}
+static void f3654(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,1);*r1p=r1;*r0p=r0;}
+static void f3655(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-3);*r1p=r1;*r0p=r0;}
+static void f3656(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,4);*r1p=r1;*r0p=r0;}
+static void f3657(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-4);*r1p=r1;*r0p=r0;}
+static void f3658(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,3);*r1p=r1;*r0p=r0;}
+static void f3659(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-5);*r1p=r1;*r0p=r0;}
+static void f3660(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,8);*r1p=r1;*r0p=r0;}
+static void f3661(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-8);*r1p=r1;*r0p=r0;}
+static void f3662(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,7);*r1p=r1;*r0p=r0;}
+static void f3663(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-9);*r1p=r1;*r0p=r0;}
+static void f3664(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,16);*r1p=r1;*r0p=r0;}
+static void f3665(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-16);*r1p=r1;*r0p=r0;}
+static void f3666(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,15);*r1p=r1;*r0p=r0;}
+static void f3667(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-17);*r1p=r1;*r0p=r0;}
+static void f3668(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,32);*r1p=r1;*r0p=r0;}
+static void f3669(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-32);*r1p=r1;*r0p=r0;}
+static void f3670(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,31);*r1p=r1;*r0p=r0;}
+static void f3671(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-33);*r1p=r1;*r0p=r0;}
+static void f3672(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,64);*r1p=r1;*r0p=r0;}
+static void f3673(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-64);*r1p=r1;*r0p=r0;}
+static void f3674(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,63);*r1p=r1;*r0p=r0;}
+static void f3675(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-65);*r1p=r1;*r0p=r0;}
+static void f3676(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,128);*r1p=r1;*r0p=r0;}
+static void f3677(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-128);*r1p=r1;*r0p=r0;}
+static void f3678(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,127);*r1p=r1;*r0p=r0;}
+static void f3679(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-129);*r1p=r1;*r0p=r0;}
+static void f3680(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,256);*r1p=r1;*r0p=r0;}
+static void f3681(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-256);*r1p=r1;*r0p=r0;}
+static void f3682(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,255);*r1p=r1;*r0p=r0;}
+static void f3683(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-257);*r1p=r1;*r0p=r0;}
+static void f3684(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,512);*r1p=r1;*r0p=r0;}
+static void f3685(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-512);*r1p=r1;*r0p=r0;}
+static void f3686(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,511);*r1p=r1;*r0p=r0;}
+static void f3687(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-513);*r1p=r1;*r0p=r0;}
+static void f3688(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,1024);*r1p=r1;*r0p=r0;}
+static void f3689(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3690(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,1023);*r1p=r1;*r0p=r0;}
+static void f3691(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3692(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,2048);*r1p=r1;*r0p=r0;}
+static void f3693(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3694(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,2047);*r1p=r1;*r0p=r0;}
+static void f3695(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3696(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,4096);*r1p=r1;*r0p=r0;}
+static void f3697(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3698(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,4095);*r1p=r1;*r0p=r0;}
+static void f3699(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3700(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,8192);*r1p=r1;*r0p=r0;}
+static void f3701(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3702(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,8191);*r1p=r1;*r0p=r0;}
+static void f3703(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3704(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,16384);*r1p=r1;*r0p=r0;}
+static void f3705(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3706(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,16383);*r1p=r1;*r0p=r0;}
+static void f3707(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3708(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,32768);*r1p=r1;*r0p=r0;}
+static void f3709(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3710(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,32767);*r1p=r1;*r0p=r0;}
+static void f3711(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16384,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3712(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,1);*r1p=r1;*r0p=r0;}
+static void f3713(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-1);*r1p=r1;*r0p=r0;}
+static void f3714(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,0);*r1p=r1;*r0p=r0;}
+static void f3715(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-2);*r1p=r1;*r0p=r0;}
+static void f3716(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,2);*r1p=r1;*r0p=r0;}
+static void f3717(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-2);*r1p=r1;*r0p=r0;}
+static void f3718(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,1);*r1p=r1;*r0p=r0;}
+static void f3719(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-3);*r1p=r1;*r0p=r0;}
+static void f3720(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,4);*r1p=r1;*r0p=r0;}
+static void f3721(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-4);*r1p=r1;*r0p=r0;}
+static void f3722(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,3);*r1p=r1;*r0p=r0;}
+static void f3723(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-5);*r1p=r1;*r0p=r0;}
+static void f3724(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,8);*r1p=r1;*r0p=r0;}
+static void f3725(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-8);*r1p=r1;*r0p=r0;}
+static void f3726(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,7);*r1p=r1;*r0p=r0;}
+static void f3727(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-9);*r1p=r1;*r0p=r0;}
+static void f3728(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,16);*r1p=r1;*r0p=r0;}
+static void f3729(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-16);*r1p=r1;*r0p=r0;}
+static void f3730(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,15);*r1p=r1;*r0p=r0;}
+static void f3731(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-17);*r1p=r1;*r0p=r0;}
+static void f3732(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,32);*r1p=r1;*r0p=r0;}
+static void f3733(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-32);*r1p=r1;*r0p=r0;}
+static void f3734(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,31);*r1p=r1;*r0p=r0;}
+static void f3735(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-33);*r1p=r1;*r0p=r0;}
+static void f3736(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,64);*r1p=r1;*r0p=r0;}
+static void f3737(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-64);*r1p=r1;*r0p=r0;}
+static void f3738(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,63);*r1p=r1;*r0p=r0;}
+static void f3739(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-65);*r1p=r1;*r0p=r0;}
+static void f3740(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,128);*r1p=r1;*r0p=r0;}
+static void f3741(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-128);*r1p=r1;*r0p=r0;}
+static void f3742(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,127);*r1p=r1;*r0p=r0;}
+static void f3743(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-129);*r1p=r1;*r0p=r0;}
+static void f3744(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,256);*r1p=r1;*r0p=r0;}
+static void f3745(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-256);*r1p=r1;*r0p=r0;}
+static void f3746(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,255);*r1p=r1;*r0p=r0;}
+static void f3747(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-257);*r1p=r1;*r0p=r0;}
+static void f3748(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,512);*r1p=r1;*r0p=r0;}
+static void f3749(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-512);*r1p=r1;*r0p=r0;}
+static void f3750(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,511);*r1p=r1;*r0p=r0;}
+static void f3751(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-513);*r1p=r1;*r0p=r0;}
+static void f3752(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,1024);*r1p=r1;*r0p=r0;}
+static void f3753(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3754(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,1023);*r1p=r1;*r0p=r0;}
+static void f3755(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3756(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,2048);*r1p=r1;*r0p=r0;}
+static void f3757(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3758(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,2047);*r1p=r1;*r0p=r0;}
+static void f3759(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3760(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,4096);*r1p=r1;*r0p=r0;}
+static void f3761(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3762(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,4095);*r1p=r1;*r0p=r0;}
+static void f3763(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3764(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,8192);*r1p=r1;*r0p=r0;}
+static void f3765(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3766(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,8191);*r1p=r1;*r0p=r0;}
+static void f3767(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3768(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,16384);*r1p=r1;*r0p=r0;}
+static void f3769(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3770(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,16383);*r1p=r1;*r0p=r0;}
+static void f3771(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3772(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,32768);*r1p=r1;*r0p=r0;}
+static void f3773(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3774(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,32767);*r1p=r1;*r0p=r0;}
+static void f3775(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,16383,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3776(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,1);*r1p=r1;*r0p=r0;}
+static void f3777(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-1);*r1p=r1;*r0p=r0;}
+static void f3778(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,0);*r1p=r1;*r0p=r0;}
+static void f3779(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-2);*r1p=r1;*r0p=r0;}
+static void f3780(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,2);*r1p=r1;*r0p=r0;}
+static void f3781(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-2);*r1p=r1;*r0p=r0;}
+static void f3782(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,1);*r1p=r1;*r0p=r0;}
+static void f3783(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-3);*r1p=r1;*r0p=r0;}
+static void f3784(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,4);*r1p=r1;*r0p=r0;}
+static void f3785(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-4);*r1p=r1;*r0p=r0;}
+static void f3786(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,3);*r1p=r1;*r0p=r0;}
+static void f3787(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-5);*r1p=r1;*r0p=r0;}
+static void f3788(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,8);*r1p=r1;*r0p=r0;}
+static void f3789(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-8);*r1p=r1;*r0p=r0;}
+static void f3790(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,7);*r1p=r1;*r0p=r0;}
+static void f3791(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-9);*r1p=r1;*r0p=r0;}
+static void f3792(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,16);*r1p=r1;*r0p=r0;}
+static void f3793(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-16);*r1p=r1;*r0p=r0;}
+static void f3794(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,15);*r1p=r1;*r0p=r0;}
+static void f3795(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-17);*r1p=r1;*r0p=r0;}
+static void f3796(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,32);*r1p=r1;*r0p=r0;}
+static void f3797(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-32);*r1p=r1;*r0p=r0;}
+static void f3798(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,31);*r1p=r1;*r0p=r0;}
+static void f3799(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-33);*r1p=r1;*r0p=r0;}
+static void f3800(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,64);*r1p=r1;*r0p=r0;}
+static void f3801(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-64);*r1p=r1;*r0p=r0;}
+static void f3802(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,63);*r1p=r1;*r0p=r0;}
+static void f3803(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-65);*r1p=r1;*r0p=r0;}
+static void f3804(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,128);*r1p=r1;*r0p=r0;}
+static void f3805(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-128);*r1p=r1;*r0p=r0;}
+static void f3806(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,127);*r1p=r1;*r0p=r0;}
+static void f3807(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-129);*r1p=r1;*r0p=r0;}
+static void f3808(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,256);*r1p=r1;*r0p=r0;}
+static void f3809(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-256);*r1p=r1;*r0p=r0;}
+static void f3810(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,255);*r1p=r1;*r0p=r0;}
+static void f3811(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-257);*r1p=r1;*r0p=r0;}
+static void f3812(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,512);*r1p=r1;*r0p=r0;}
+static void f3813(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-512);*r1p=r1;*r0p=r0;}
+static void f3814(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,511);*r1p=r1;*r0p=r0;}
+static void f3815(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-513);*r1p=r1;*r0p=r0;}
+static void f3816(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,1024);*r1p=r1;*r0p=r0;}
+static void f3817(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3818(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,1023);*r1p=r1;*r0p=r0;}
+static void f3819(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3820(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,2048);*r1p=r1;*r0p=r0;}
+static void f3821(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3822(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,2047);*r1p=r1;*r0p=r0;}
+static void f3823(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3824(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,4096);*r1p=r1;*r0p=r0;}
+static void f3825(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3826(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,4095);*r1p=r1;*r0p=r0;}
+static void f3827(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3828(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,8192);*r1p=r1;*r0p=r0;}
+static void f3829(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3830(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,8191);*r1p=r1;*r0p=r0;}
+static void f3831(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3832(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,16384);*r1p=r1;*r0p=r0;}
+static void f3833(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3834(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,16383);*r1p=r1;*r0p=r0;}
+static void f3835(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3836(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,32768);*r1p=r1;*r0p=r0;}
+static void f3837(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3838(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,32767);*r1p=r1;*r0p=r0;}
+static void f3839(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-16385,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3840(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3841(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-1);*r1p=r1;*r0p=r0;}
+static void f3842(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,0);*r1p=r1;*r0p=r0;}
+static void f3843(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3844(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,2);*r1p=r1;*r0p=r0;}
+static void f3845(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3846(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3847(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-3);*r1p=r1;*r0p=r0;}
+static void f3848(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,4);*r1p=r1;*r0p=r0;}
+static void f3849(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-4);*r1p=r1;*r0p=r0;}
+static void f3850(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,3);*r1p=r1;*r0p=r0;}
+static void f3851(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-5);*r1p=r1;*r0p=r0;}
+static void f3852(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,8);*r1p=r1;*r0p=r0;}
+static void f3853(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-8);*r1p=r1;*r0p=r0;}
+static void f3854(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,7);*r1p=r1;*r0p=r0;}
+static void f3855(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-9);*r1p=r1;*r0p=r0;}
+static void f3856(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,16);*r1p=r1;*r0p=r0;}
+static void f3857(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-16);*r1p=r1;*r0p=r0;}
+static void f3858(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,15);*r1p=r1;*r0p=r0;}
+static void f3859(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-17);*r1p=r1;*r0p=r0;}
+static void f3860(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,32);*r1p=r1;*r0p=r0;}
+static void f3861(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-32);*r1p=r1;*r0p=r0;}
+static void f3862(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,31);*r1p=r1;*r0p=r0;}
+static void f3863(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-33);*r1p=r1;*r0p=r0;}
+static void f3864(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,64);*r1p=r1;*r0p=r0;}
+static void f3865(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-64);*r1p=r1;*r0p=r0;}
+static void f3866(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,63);*r1p=r1;*r0p=r0;}
+static void f3867(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-65);*r1p=r1;*r0p=r0;}
+static void f3868(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,128);*r1p=r1;*r0p=r0;}
+static void f3869(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-128);*r1p=r1;*r0p=r0;}
+static void f3870(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,127);*r1p=r1;*r0p=r0;}
+static void f3871(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-129);*r1p=r1;*r0p=r0;}
+static void f3872(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,256);*r1p=r1;*r0p=r0;}
+static void f3873(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-256);*r1p=r1;*r0p=r0;}
+static void f3874(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,255);*r1p=r1;*r0p=r0;}
+static void f3875(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-257);*r1p=r1;*r0p=r0;}
+static void f3876(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,512);*r1p=r1;*r0p=r0;}
+static void f3877(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-512);*r1p=r1;*r0p=r0;}
+static void f3878(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,511);*r1p=r1;*r0p=r0;}
+static void f3879(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-513);*r1p=r1;*r0p=r0;}
+static void f3880(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,1024);*r1p=r1;*r0p=r0;}
+static void f3881(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3882(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,1023);*r1p=r1;*r0p=r0;}
+static void f3883(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3884(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,2048);*r1p=r1;*r0p=r0;}
+static void f3885(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3886(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,2047);*r1p=r1;*r0p=r0;}
+static void f3887(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3888(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,4096);*r1p=r1;*r0p=r0;}
+static void f3889(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3890(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,4095);*r1p=r1;*r0p=r0;}
+static void f3891(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3892(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,8192);*r1p=r1;*r0p=r0;}
+static void f3893(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3894(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,8191);*r1p=r1;*r0p=r0;}
+static void f3895(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3896(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,16384);*r1p=r1;*r0p=r0;}
+static void f3897(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3898(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,16383);*r1p=r1;*r0p=r0;}
+static void f3899(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3900(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,32768);*r1p=r1;*r0p=r0;}
+static void f3901(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3902(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,32767);*r1p=r1;*r0p=r0;}
+static void f3903(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32768,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3904(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3905(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-1);*r1p=r1;*r0p=r0;}
+static void f3906(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,0);*r1p=r1;*r0p=r0;}
+static void f3907(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3908(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,2);*r1p=r1;*r0p=r0;}
+static void f3909(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-2);*r1p=r1;*r0p=r0;}
+static void f3910(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,1);*r1p=r1;*r0p=r0;}
+static void f3911(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-3);*r1p=r1;*r0p=r0;}
+static void f3912(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,4);*r1p=r1;*r0p=r0;}
+static void f3913(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-4);*r1p=r1;*r0p=r0;}
+static void f3914(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,3);*r1p=r1;*r0p=r0;}
+static void f3915(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-5);*r1p=r1;*r0p=r0;}
+static void f3916(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,8);*r1p=r1;*r0p=r0;}
+static void f3917(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-8);*r1p=r1;*r0p=r0;}
+static void f3918(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,7);*r1p=r1;*r0p=r0;}
+static void f3919(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-9);*r1p=r1;*r0p=r0;}
+static void f3920(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,16);*r1p=r1;*r0p=r0;}
+static void f3921(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-16);*r1p=r1;*r0p=r0;}
+static void f3922(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,15);*r1p=r1;*r0p=r0;}
+static void f3923(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-17);*r1p=r1;*r0p=r0;}
+static void f3924(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,32);*r1p=r1;*r0p=r0;}
+static void f3925(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-32);*r1p=r1;*r0p=r0;}
+static void f3926(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,31);*r1p=r1;*r0p=r0;}
+static void f3927(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-33);*r1p=r1;*r0p=r0;}
+static void f3928(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,64);*r1p=r1;*r0p=r0;}
+static void f3929(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-64);*r1p=r1;*r0p=r0;}
+static void f3930(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,63);*r1p=r1;*r0p=r0;}
+static void f3931(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-65);*r1p=r1;*r0p=r0;}
+static void f3932(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,128);*r1p=r1;*r0p=r0;}
+static void f3933(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-128);*r1p=r1;*r0p=r0;}
+static void f3934(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,127);*r1p=r1;*r0p=r0;}
+static void f3935(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-129);*r1p=r1;*r0p=r0;}
+static void f3936(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,256);*r1p=r1;*r0p=r0;}
+static void f3937(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-256);*r1p=r1;*r0p=r0;}
+static void f3938(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,255);*r1p=r1;*r0p=r0;}
+static void f3939(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-257);*r1p=r1;*r0p=r0;}
+static void f3940(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,512);*r1p=r1;*r0p=r0;}
+static void f3941(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-512);*r1p=r1;*r0p=r0;}
+static void f3942(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,511);*r1p=r1;*r0p=r0;}
+static void f3943(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-513);*r1p=r1;*r0p=r0;}
+static void f3944(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,1024);*r1p=r1;*r0p=r0;}
+static void f3945(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-1024);*r1p=r1;*r0p=r0;}
+static void f3946(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,1023);*r1p=r1;*r0p=r0;}
+static void f3947(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-1025);*r1p=r1;*r0p=r0;}
+static void f3948(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,2048);*r1p=r1;*r0p=r0;}
+static void f3949(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-2048);*r1p=r1;*r0p=r0;}
+static void f3950(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,2047);*r1p=r1;*r0p=r0;}
+static void f3951(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-2049);*r1p=r1;*r0p=r0;}
+static void f3952(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,4096);*r1p=r1;*r0p=r0;}
+static void f3953(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-4096);*r1p=r1;*r0p=r0;}
+static void f3954(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,4095);*r1p=r1;*r0p=r0;}
+static void f3955(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-4097);*r1p=r1;*r0p=r0;}
+static void f3956(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,8192);*r1p=r1;*r0p=r0;}
+static void f3957(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-8192);*r1p=r1;*r0p=r0;}
+static void f3958(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,8191);*r1p=r1;*r0p=r0;}
+static void f3959(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-8193);*r1p=r1;*r0p=r0;}
+static void f3960(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,16384);*r1p=r1;*r0p=r0;}
+static void f3961(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-16384);*r1p=r1;*r0p=r0;}
+static void f3962(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,16383);*r1p=r1;*r0p=r0;}
+static void f3963(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-16385);*r1p=r1;*r0p=r0;}
+static void f3964(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,32768);*r1p=r1;*r0p=r0;}
+static void f3965(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-32768);*r1p=r1;*r0p=r0;}
+static void f3966(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,32767);*r1p=r1;*r0p=r0;}
+static void f3967(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32768,0,-32769);*r1p=r1;*r0p=r0;}
+static void f3968(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,1);*r1p=r1;*r0p=r0;}
+static void f3969(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-1);*r1p=r1;*r0p=r0;}
+static void f3970(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,0);*r1p=r1;*r0p=r0;}
+static void f3971(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-2);*r1p=r1;*r0p=r0;}
+static void f3972(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,2);*r1p=r1;*r0p=r0;}
+static void f3973(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-2);*r1p=r1;*r0p=r0;}
+static void f3974(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,1);*r1p=r1;*r0p=r0;}
+static void f3975(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-3);*r1p=r1;*r0p=r0;}
+static void f3976(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,4);*r1p=r1;*r0p=r0;}
+static void f3977(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-4);*r1p=r1;*r0p=r0;}
+static void f3978(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,3);*r1p=r1;*r0p=r0;}
+static void f3979(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-5);*r1p=r1;*r0p=r0;}
+static void f3980(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,8);*r1p=r1;*r0p=r0;}
+static void f3981(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-8);*r1p=r1;*r0p=r0;}
+static void f3982(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,7);*r1p=r1;*r0p=r0;}
+static void f3983(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-9);*r1p=r1;*r0p=r0;}
+static void f3984(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,16);*r1p=r1;*r0p=r0;}
+static void f3985(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-16);*r1p=r1;*r0p=r0;}
+static void f3986(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,15);*r1p=r1;*r0p=r0;}
+static void f3987(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-17);*r1p=r1;*r0p=r0;}
+static void f3988(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,32);*r1p=r1;*r0p=r0;}
+static void f3989(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-32);*r1p=r1;*r0p=r0;}
+static void f3990(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,31);*r1p=r1;*r0p=r0;}
+static void f3991(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-33);*r1p=r1;*r0p=r0;}
+static void f3992(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,64);*r1p=r1;*r0p=r0;}
+static void f3993(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-64);*r1p=r1;*r0p=r0;}
+static void f3994(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,63);*r1p=r1;*r0p=r0;}
+static void f3995(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-65);*r1p=r1;*r0p=r0;}
+static void f3996(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,128);*r1p=r1;*r0p=r0;}
+static void f3997(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-128);*r1p=r1;*r0p=r0;}
+static void f3998(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,127);*r1p=r1;*r0p=r0;}
+static void f3999(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-129);*r1p=r1;*r0p=r0;}
+static void f4000(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,256);*r1p=r1;*r0p=r0;}
+static void f4001(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-256);*r1p=r1;*r0p=r0;}
+static void f4002(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,255);*r1p=r1;*r0p=r0;}
+static void f4003(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-257);*r1p=r1;*r0p=r0;}
+static void f4004(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,512);*r1p=r1;*r0p=r0;}
+static void f4005(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-512);*r1p=r1;*r0p=r0;}
+static void f4006(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,511);*r1p=r1;*r0p=r0;}
+static void f4007(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-513);*r1p=r1;*r0p=r0;}
+static void f4008(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,1024);*r1p=r1;*r0p=r0;}
+static void f4009(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-1024);*r1p=r1;*r0p=r0;}
+static void f4010(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,1023);*r1p=r1;*r0p=r0;}
+static void f4011(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-1025);*r1p=r1;*r0p=r0;}
+static void f4012(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,2048);*r1p=r1;*r0p=r0;}
+static void f4013(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-2048);*r1p=r1;*r0p=r0;}
+static void f4014(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,2047);*r1p=r1;*r0p=r0;}
+static void f4015(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-2049);*r1p=r1;*r0p=r0;}
+static void f4016(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,4096);*r1p=r1;*r0p=r0;}
+static void f4017(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-4096);*r1p=r1;*r0p=r0;}
+static void f4018(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,4095);*r1p=r1;*r0p=r0;}
+static void f4019(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-4097);*r1p=r1;*r0p=r0;}
+static void f4020(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,8192);*r1p=r1;*r0p=r0;}
+static void f4021(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-8192);*r1p=r1;*r0p=r0;}
+static void f4022(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,8191);*r1p=r1;*r0p=r0;}
+static void f4023(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-8193);*r1p=r1;*r0p=r0;}
+static void f4024(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,16384);*r1p=r1;*r0p=r0;}
+static void f4025(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-16384);*r1p=r1;*r0p=r0;}
+static void f4026(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,16383);*r1p=r1;*r0p=r0;}
+static void f4027(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-16385);*r1p=r1;*r0p=r0;}
+static void f4028(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,32768);*r1p=r1;*r0p=r0;}
+static void f4029(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-32768);*r1p=r1;*r0p=r0;}
+static void f4030(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,32767);*r1p=r1;*r0p=r0;}
+static void f4031(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,32767,0,-32769);*r1p=r1;*r0p=r0;}
+static void f4032(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,1);*r1p=r1;*r0p=r0;}
+static void f4033(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-1);*r1p=r1;*r0p=r0;}
+static void f4034(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,0);*r1p=r1;*r0p=r0;}
+static void f4035(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-2);*r1p=r1;*r0p=r0;}
+static void f4036(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,2);*r1p=r1;*r0p=r0;}
+static void f4037(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-2);*r1p=r1;*r0p=r0;}
+static void f4038(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,1);*r1p=r1;*r0p=r0;}
+static void f4039(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-3);*r1p=r1;*r0p=r0;}
+static void f4040(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,4);*r1p=r1;*r0p=r0;}
+static void f4041(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-4);*r1p=r1;*r0p=r0;}
+static void f4042(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,3);*r1p=r1;*r0p=r0;}
+static void f4043(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-5);*r1p=r1;*r0p=r0;}
+static void f4044(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,8);*r1p=r1;*r0p=r0;}
+static void f4045(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-8);*r1p=r1;*r0p=r0;}
+static void f4046(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,7);*r1p=r1;*r0p=r0;}
+static void f4047(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-9);*r1p=r1;*r0p=r0;}
+static void f4048(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,16);*r1p=r1;*r0p=r0;}
+static void f4049(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-16);*r1p=r1;*r0p=r0;}
+static void f4050(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,15);*r1p=r1;*r0p=r0;}
+static void f4051(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-17);*r1p=r1;*r0p=r0;}
+static void f4052(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,32);*r1p=r1;*r0p=r0;}
+static void f4053(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-32);*r1p=r1;*r0p=r0;}
+static void f4054(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,31);*r1p=r1;*r0p=r0;}
+static void f4055(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-33);*r1p=r1;*r0p=r0;}
+static void f4056(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,64);*r1p=r1;*r0p=r0;}
+static void f4057(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-64);*r1p=r1;*r0p=r0;}
+static void f4058(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,63);*r1p=r1;*r0p=r0;}
+static void f4059(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-65);*r1p=r1;*r0p=r0;}
+static void f4060(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,128);*r1p=r1;*r0p=r0;}
+static void f4061(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-128);*r1p=r1;*r0p=r0;}
+static void f4062(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,127);*r1p=r1;*r0p=r0;}
+static void f4063(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-129);*r1p=r1;*r0p=r0;}
+static void f4064(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,256);*r1p=r1;*r0p=r0;}
+static void f4065(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-256);*r1p=r1;*r0p=r0;}
+static void f4066(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,255);*r1p=r1;*r0p=r0;}
+static void f4067(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-257);*r1p=r1;*r0p=r0;}
+static void f4068(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,512);*r1p=r1;*r0p=r0;}
+static void f4069(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-512);*r1p=r1;*r0p=r0;}
+static void f4070(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,511);*r1p=r1;*r0p=r0;}
+static void f4071(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-513);*r1p=r1;*r0p=r0;}
+static void f4072(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,1024);*r1p=r1;*r0p=r0;}
+static void f4073(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-1024);*r1p=r1;*r0p=r0;}
+static void f4074(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,1023);*r1p=r1;*r0p=r0;}
+static void f4075(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-1025);*r1p=r1;*r0p=r0;}
+static void f4076(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,2048);*r1p=r1;*r0p=r0;}
+static void f4077(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-2048);*r1p=r1;*r0p=r0;}
+static void f4078(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,2047);*r1p=r1;*r0p=r0;}
+static void f4079(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-2049);*r1p=r1;*r0p=r0;}
+static void f4080(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,4096);*r1p=r1;*r0p=r0;}
+static void f4081(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-4096);*r1p=r1;*r0p=r0;}
+static void f4082(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,4095);*r1p=r1;*r0p=r0;}
+static void f4083(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-4097);*r1p=r1;*r0p=r0;}
+static void f4084(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,8192);*r1p=r1;*r0p=r0;}
+static void f4085(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-8192);*r1p=r1;*r0p=r0;}
+static void f4086(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,8191);*r1p=r1;*r0p=r0;}
+static void f4087(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-8193);*r1p=r1;*r0p=r0;}
+static void f4088(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,16384);*r1p=r1;*r0p=r0;}
+static void f4089(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-16384);*r1p=r1;*r0p=r0;}
+static void f4090(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,16383);*r1p=r1;*r0p=r0;}
+static void f4091(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-16385);*r1p=r1;*r0p=r0;}
+static void f4092(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,32768);*r1p=r1;*r0p=r0;}
+static void f4093(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-32768);*r1p=r1;*r0p=r0;}
+static void f4094(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,32767);*r1p=r1;*r0p=r0;}
+static void f4095(mp_limb_t*r1p,mp_limb_t*r0p){mp_limb_t r1,r0;sub_ddmmss(r1,r0,0,-32769,0,-32769);*r1p=r1;*r0p=r0;}
+typedef void (*func_t) (mp_limb_t*, mp_limb_t*);
+static const func_t funcs[4096] = {
+f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,
+f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,
+f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,
+f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,
+f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,
+f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,
+f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108,f109,f110,f111,
+f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,
+f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,
+f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,
+f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,
+f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,
+f192,f193,f194,f195,f196,f197,f198,f199,f200,f201,f202,f203,f204,f205,f206,f207,
+f208,f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,
+f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,
+f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,
+f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,
+f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,
+f288,f289,f290,f291,f292,f293,f294,f295,f296,f297,f298,f299,f300,f301,f302,f303,
+f304,f305,f306,f307,f308,f309,f310,f311,f312,f313,f314,f315,f316,f317,f318,f319,
+f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,
+f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,
+f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,
+f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,
+f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,
+f400,f401,f402,f403,f404,f405,f406,f407,f408,f409,f410,f411,f412,f413,f414,f415,
+f416,f417,f418,f419,f420,f421,f422,f423,f424,f425,f426,f427,f428,f429,f430,f431,
+f432,f433,f434,f435,f436,f437,f438,f439,f440,f441,f442,f443,f444,f445,f446,f447,
+f448,f449,f450,f451,f452,f453,f454,f455,f456,f457,f458,f459,f460,f461,f462,f463,
+f464,f465,f466,f467,f468,f469,f470,f471,f472,f473,f474,f475,f476,f477,f478,f479,
+f480,f481,f482,f483,f484,f485,f486,f487,f488,f489,f490,f491,f492,f493,f494,f495,
+f496,f497,f498,f499,f500,f501,f502,f503,f504,f505,f506,f507,f508,f509,f510,f511,
+f512,f513,f514,f515,f516,f517,f518,f519,f520,f521,f522,f523,f524,f525,f526,f527,
+f528,f529,f530,f531,f532,f533,f534,f535,f536,f537,f538,f539,f540,f541,f542,f543,
+f544,f545,f546,f547,f548,f549,f550,f551,f552,f553,f554,f555,f556,f557,f558,f559,
+f560,f561,f562,f563,f564,f565,f566,f567,f568,f569,f570,f571,f572,f573,f574,f575,
+f576,f577,f578,f579,f580,f581,f582,f583,f584,f585,f586,f587,f588,f589,f590,f591,
+f592,f593,f594,f595,f596,f597,f598,f599,f600,f601,f602,f603,f604,f605,f606,f607,
+f608,f609,f610,f611,f612,f613,f614,f615,f616,f617,f618,f619,f620,f621,f622,f623,
+f624,f625,f626,f627,f628,f629,f630,f631,f632,f633,f634,f635,f636,f637,f638,f639,
+f640,f641,f642,f643,f644,f645,f646,f647,f648,f649,f650,f651,f652,f653,f654,f655,
+f656,f657,f658,f659,f660,f661,f662,f663,f664,f665,f666,f667,f668,f669,f670,f671,
+f672,f673,f674,f675,f676,f677,f678,f679,f680,f681,f682,f683,f684,f685,f686,f687,
+f688,f689,f690,f691,f692,f693,f694,f695,f696,f697,f698,f699,f700,f701,f702,f703,
+f704,f705,f706,f707,f708,f709,f710,f711,f712,f713,f714,f715,f716,f717,f718,f719,
+f720,f721,f722,f723,f724,f725,f726,f727,f728,f729,f730,f731,f732,f733,f734,f735,
+f736,f737,f738,f739,f740,f741,f742,f743,f744,f745,f746,f747,f748,f749,f750,f751,
+f752,f753,f754,f755,f756,f757,f758,f759,f760,f761,f762,f763,f764,f765,f766,f767,
+f768,f769,f770,f771,f772,f773,f774,f775,f776,f777,f778,f779,f780,f781,f782,f783,
+f784,f785,f786,f787,f788,f789,f790,f791,f792,f793,f794,f795,f796,f797,f798,f799,
+f800,f801,f802,f803,f804,f805,f806,f807,f808,f809,f810,f811,f812,f813,f814,f815,
+f816,f817,f818,f819,f820,f821,f822,f823,f824,f825,f826,f827,f828,f829,f830,f831,
+f832,f833,f834,f835,f836,f837,f838,f839,f840,f841,f842,f843,f844,f845,f846,f847,
+f848,f849,f850,f851,f852,f853,f854,f855,f856,f857,f858,f859,f860,f861,f862,f863,
+f864,f865,f866,f867,f868,f869,f870,f871,f872,f873,f874,f875,f876,f877,f878,f879,
+f880,f881,f882,f883,f884,f885,f886,f887,f888,f889,f890,f891,f892,f893,f894,f895,
+f896,f897,f898,f899,f900,f901,f902,f903,f904,f905,f906,f907,f908,f909,f910,f911,
+f912,f913,f914,f915,f916,f917,f918,f919,f920,f921,f922,f923,f924,f925,f926,f927,
+f928,f929,f930,f931,f932,f933,f934,f935,f936,f937,f938,f939,f940,f941,f942,f943,
+f944,f945,f946,f947,f948,f949,f950,f951,f952,f953,f954,f955,f956,f957,f958,f959,
+f960,f961,f962,f963,f964,f965,f966,f967,f968,f969,f970,f971,f972,f973,f974,f975,
+f976,f977,f978,f979,f980,f981,f982,f983,f984,f985,f986,f987,f988,f989,f990,f991,
+f992,f993,f994,f995,f996,f997,f998,f999,f1000,f1001,f1002,f1003,f1004,f1005,f1006,f1007,
+f1008,f1009,f1010,f1011,f1012,f1013,f1014,f1015,f1016,f1017,f1018,f1019,f1020,f1021,f1022,f1023,
+f1024,f1025,f1026,f1027,f1028,f1029,f1030,f1031,f1032,f1033,f1034,f1035,f1036,f1037,f1038,f1039,
+f1040,f1041,f1042,f1043,f1044,f1045,f1046,f1047,f1048,f1049,f1050,f1051,f1052,f1053,f1054,f1055,
+f1056,f1057,f1058,f1059,f1060,f1061,f1062,f1063,f1064,f1065,f1066,f1067,f1068,f1069,f1070,f1071,
+f1072,f1073,f1074,f1075,f1076,f1077,f1078,f1079,f1080,f1081,f1082,f1083,f1084,f1085,f1086,f1087,
+f1088,f1089,f1090,f1091,f1092,f1093,f1094,f1095,f1096,f1097,f1098,f1099,f1100,f1101,f1102,f1103,
+f1104,f1105,f1106,f1107,f1108,f1109,f1110,f1111,f1112,f1113,f1114,f1115,f1116,f1117,f1118,f1119,
+f1120,f1121,f1122,f1123,f1124,f1125,f1126,f1127,f1128,f1129,f1130,f1131,f1132,f1133,f1134,f1135,
+f1136,f1137,f1138,f1139,f1140,f1141,f1142,f1143,f1144,f1145,f1146,f1147,f1148,f1149,f1150,f1151,
+f1152,f1153,f1154,f1155,f1156,f1157,f1158,f1159,f1160,f1161,f1162,f1163,f1164,f1165,f1166,f1167,
+f1168,f1169,f1170,f1171,f1172,f1173,f1174,f1175,f1176,f1177,f1178,f1179,f1180,f1181,f1182,f1183,
+f1184,f1185,f1186,f1187,f1188,f1189,f1190,f1191,f1192,f1193,f1194,f1195,f1196,f1197,f1198,f1199,
+f1200,f1201,f1202,f1203,f1204,f1205,f1206,f1207,f1208,f1209,f1210,f1211,f1212,f1213,f1214,f1215,
+f1216,f1217,f1218,f1219,f1220,f1221,f1222,f1223,f1224,f1225,f1226,f1227,f1228,f1229,f1230,f1231,
+f1232,f1233,f1234,f1235,f1236,f1237,f1238,f1239,f1240,f1241,f1242,f1243,f1244,f1245,f1246,f1247,
+f1248,f1249,f1250,f1251,f1252,f1253,f1254,f1255,f1256,f1257,f1258,f1259,f1260,f1261,f1262,f1263,
+f1264,f1265,f1266,f1267,f1268,f1269,f1270,f1271,f1272,f1273,f1274,f1275,f1276,f1277,f1278,f1279,
+f1280,f1281,f1282,f1283,f1284,f1285,f1286,f1287,f1288,f1289,f1290,f1291,f1292,f1293,f1294,f1295,
+f1296,f1297,f1298,f1299,f1300,f1301,f1302,f1303,f1304,f1305,f1306,f1307,f1308,f1309,f1310,f1311,
+f1312,f1313,f1314,f1315,f1316,f1317,f1318,f1319,f1320,f1321,f1322,f1323,f1324,f1325,f1326,f1327,
+f1328,f1329,f1330,f1331,f1332,f1333,f1334,f1335,f1336,f1337,f1338,f1339,f1340,f1341,f1342,f1343,
+f1344,f1345,f1346,f1347,f1348,f1349,f1350,f1351,f1352,f1353,f1354,f1355,f1356,f1357,f1358,f1359,
+f1360,f1361,f1362,f1363,f1364,f1365,f1366,f1367,f1368,f1369,f1370,f1371,f1372,f1373,f1374,f1375,
+f1376,f1377,f1378,f1379,f1380,f1381,f1382,f1383,f1384,f1385,f1386,f1387,f1388,f1389,f1390,f1391,
+f1392,f1393,f1394,f1395,f1396,f1397,f1398,f1399,f1400,f1401,f1402,f1403,f1404,f1405,f1406,f1407,
+f1408,f1409,f1410,f1411,f1412,f1413,f1414,f1415,f1416,f1417,f1418,f1419,f1420,f1421,f1422,f1423,
+f1424,f1425,f1426,f1427,f1428,f1429,f1430,f1431,f1432,f1433,f1434,f1435,f1436,f1437,f1438,f1439,
+f1440,f1441,f1442,f1443,f1444,f1445,f1446,f1447,f1448,f1449,f1450,f1451,f1452,f1453,f1454,f1455,
+f1456,f1457,f1458,f1459,f1460,f1461,f1462,f1463,f1464,f1465,f1466,f1467,f1468,f1469,f1470,f1471,
+f1472,f1473,f1474,f1475,f1476,f1477,f1478,f1479,f1480,f1481,f1482,f1483,f1484,f1485,f1486,f1487,
+f1488,f1489,f1490,f1491,f1492,f1493,f1494,f1495,f1496,f1497,f1498,f1499,f1500,f1501,f1502,f1503,
+f1504,f1505,f1506,f1507,f1508,f1509,f1510,f1511,f1512,f1513,f1514,f1515,f1516,f1517,f1518,f1519,
+f1520,f1521,f1522,f1523,f1524,f1525,f1526,f1527,f1528,f1529,f1530,f1531,f1532,f1533,f1534,f1535,
+f1536,f1537,f1538,f1539,f1540,f1541,f1542,f1543,f1544,f1545,f1546,f1547,f1548,f1549,f1550,f1551,
+f1552,f1553,f1554,f1555,f1556,f1557,f1558,f1559,f1560,f1561,f1562,f1563,f1564,f1565,f1566,f1567,
+f1568,f1569,f1570,f1571,f1572,f1573,f1574,f1575,f1576,f1577,f1578,f1579,f1580,f1581,f1582,f1583,
+f1584,f1585,f1586,f1587,f1588,f1589,f1590,f1591,f1592,f1593,f1594,f1595,f1596,f1597,f1598,f1599,
+f1600,f1601,f1602,f1603,f1604,f1605,f1606,f1607,f1608,f1609,f1610,f1611,f1612,f1613,f1614,f1615,
+f1616,f1617,f1618,f1619,f1620,f1621,f1622,f1623,f1624,f1625,f1626,f1627,f1628,f1629,f1630,f1631,
+f1632,f1633,f1634,f1635,f1636,f1637,f1638,f1639,f1640,f1641,f1642,f1643,f1644,f1645,f1646,f1647,
+f1648,f1649,f1650,f1651,f1652,f1653,f1654,f1655,f1656,f1657,f1658,f1659,f1660,f1661,f1662,f1663,
+f1664,f1665,f1666,f1667,f1668,f1669,f1670,f1671,f1672,f1673,f1674,f1675,f1676,f1677,f1678,f1679,
+f1680,f1681,f1682,f1683,f1684,f1685,f1686,f1687,f1688,f1689,f1690,f1691,f1692,f1693,f1694,f1695,
+f1696,f1697,f1698,f1699,f1700,f1701,f1702,f1703,f1704,f1705,f1706,f1707,f1708,f1709,f1710,f1711,
+f1712,f1713,f1714,f1715,f1716,f1717,f1718,f1719,f1720,f1721,f1722,f1723,f1724,f1725,f1726,f1727,
+f1728,f1729,f1730,f1731,f1732,f1733,f1734,f1735,f1736,f1737,f1738,f1739,f1740,f1741,f1742,f1743,
+f1744,f1745,f1746,f1747,f1748,f1749,f1750,f1751,f1752,f1753,f1754,f1755,f1756,f1757,f1758,f1759,
+f1760,f1761,f1762,f1763,f1764,f1765,f1766,f1767,f1768,f1769,f1770,f1771,f1772,f1773,f1774,f1775,
+f1776,f1777,f1778,f1779,f1780,f1781,f1782,f1783,f1784,f1785,f1786,f1787,f1788,f1789,f1790,f1791,
+f1792,f1793,f1794,f1795,f1796,f1797,f1798,f1799,f1800,f1801,f1802,f1803,f1804,f1805,f1806,f1807,
+f1808,f1809,f1810,f1811,f1812,f1813,f1814,f1815,f1816,f1817,f1818,f1819,f1820,f1821,f1822,f1823,
+f1824,f1825,f1826,f1827,f1828,f1829,f1830,f1831,f1832,f1833,f1834,f1835,f1836,f1837,f1838,f1839,
+f1840,f1841,f1842,f1843,f1844,f1845,f1846,f1847,f1848,f1849,f1850,f1851,f1852,f1853,f1854,f1855,
+f1856,f1857,f1858,f1859,f1860,f1861,f1862,f1863,f1864,f1865,f1866,f1867,f1868,f1869,f1870,f1871,
+f1872,f1873,f1874,f1875,f1876,f1877,f1878,f1879,f1880,f1881,f1882,f1883,f1884,f1885,f1886,f1887,
+f1888,f1889,f1890,f1891,f1892,f1893,f1894,f1895,f1896,f1897,f1898,f1899,f1900,f1901,f1902,f1903,
+f1904,f1905,f1906,f1907,f1908,f1909,f1910,f1911,f1912,f1913,f1914,f1915,f1916,f1917,f1918,f1919,
+f1920,f1921,f1922,f1923,f1924,f1925,f1926,f1927,f1928,f1929,f1930,f1931,f1932,f1933,f1934,f1935,
+f1936,f1937,f1938,f1939,f1940,f1941,f1942,f1943,f1944,f1945,f1946,f1947,f1948,f1949,f1950,f1951,
+f1952,f1953,f1954,f1955,f1956,f1957,f1958,f1959,f1960,f1961,f1962,f1963,f1964,f1965,f1966,f1967,
+f1968,f1969,f1970,f1971,f1972,f1973,f1974,f1975,f1976,f1977,f1978,f1979,f1980,f1981,f1982,f1983,
+f1984,f1985,f1986,f1987,f1988,f1989,f1990,f1991,f1992,f1993,f1994,f1995,f1996,f1997,f1998,f1999,
+f2000,f2001,f2002,f2003,f2004,f2005,f2006,f2007,f2008,f2009,f2010,f2011,f2012,f2013,f2014,f2015,
+f2016,f2017,f2018,f2019,f2020,f2021,f2022,f2023,f2024,f2025,f2026,f2027,f2028,f2029,f2030,f2031,
+f2032,f2033,f2034,f2035,f2036,f2037,f2038,f2039,f2040,f2041,f2042,f2043,f2044,f2045,f2046,f2047,
+f2048,f2049,f2050,f2051,f2052,f2053,f2054,f2055,f2056,f2057,f2058,f2059,f2060,f2061,f2062,f2063,
+f2064,f2065,f2066,f2067,f2068,f2069,f2070,f2071,f2072,f2073,f2074,f2075,f2076,f2077,f2078,f2079,
+f2080,f2081,f2082,f2083,f2084,f2085,f2086,f2087,f2088,f2089,f2090,f2091,f2092,f2093,f2094,f2095,
+f2096,f2097,f2098,f2099,f2100,f2101,f2102,f2103,f2104,f2105,f2106,f2107,f2108,f2109,f2110,f2111,
+f2112,f2113,f2114,f2115,f2116,f2117,f2118,f2119,f2120,f2121,f2122,f2123,f2124,f2125,f2126,f2127,
+f2128,f2129,f2130,f2131,f2132,f2133,f2134,f2135,f2136,f2137,f2138,f2139,f2140,f2141,f2142,f2143,
+f2144,f2145,f2146,f2147,f2148,f2149,f2150,f2151,f2152,f2153,f2154,f2155,f2156,f2157,f2158,f2159,
+f2160,f2161,f2162,f2163,f2164,f2165,f2166,f2167,f2168,f2169,f2170,f2171,f2172,f2173,f2174,f2175,
+f2176,f2177,f2178,f2179,f2180,f2181,f2182,f2183,f2184,f2185,f2186,f2187,f2188,f2189,f2190,f2191,
+f2192,f2193,f2194,f2195,f2196,f2197,f2198,f2199,f2200,f2201,f2202,f2203,f2204,f2205,f2206,f2207,
+f2208,f2209,f2210,f2211,f2212,f2213,f2214,f2215,f2216,f2217,f2218,f2219,f2220,f2221,f2222,f2223,
+f2224,f2225,f2226,f2227,f2228,f2229,f2230,f2231,f2232,f2233,f2234,f2235,f2236,f2237,f2238,f2239,
+f2240,f2241,f2242,f2243,f2244,f2245,f2246,f2247,f2248,f2249,f2250,f2251,f2252,f2253,f2254,f2255,
+f2256,f2257,f2258,f2259,f2260,f2261,f2262,f2263,f2264,f2265,f2266,f2267,f2268,f2269,f2270,f2271,
+f2272,f2273,f2274,f2275,f2276,f2277,f2278,f2279,f2280,f2281,f2282,f2283,f2284,f2285,f2286,f2287,
+f2288,f2289,f2290,f2291,f2292,f2293,f2294,f2295,f2296,f2297,f2298,f2299,f2300,f2301,f2302,f2303,
+f2304,f2305,f2306,f2307,f2308,f2309,f2310,f2311,f2312,f2313,f2314,f2315,f2316,f2317,f2318,f2319,
+f2320,f2321,f2322,f2323,f2324,f2325,f2326,f2327,f2328,f2329,f2330,f2331,f2332,f2333,f2334,f2335,
+f2336,f2337,f2338,f2339,f2340,f2341,f2342,f2343,f2344,f2345,f2346,f2347,f2348,f2349,f2350,f2351,
+f2352,f2353,f2354,f2355,f2356,f2357,f2358,f2359,f2360,f2361,f2362,f2363,f2364,f2365,f2366,f2367,
+f2368,f2369,f2370,f2371,f2372,f2373,f2374,f2375,f2376,f2377,f2378,f2379,f2380,f2381,f2382,f2383,
+f2384,f2385,f2386,f2387,f2388,f2389,f2390,f2391,f2392,f2393,f2394,f2395,f2396,f2397,f2398,f2399,
+f2400,f2401,f2402,f2403,f2404,f2405,f2406,f2407,f2408,f2409,f2410,f2411,f2412,f2413,f2414,f2415,
+f2416,f2417,f2418,f2419,f2420,f2421,f2422,f2423,f2424,f2425,f2426,f2427,f2428,f2429,f2430,f2431,
+f2432,f2433,f2434,f2435,f2436,f2437,f2438,f2439,f2440,f2441,f2442,f2443,f2444,f2445,f2446,f2447,
+f2448,f2449,f2450,f2451,f2452,f2453,f2454,f2455,f2456,f2457,f2458,f2459,f2460,f2461,f2462,f2463,
+f2464,f2465,f2466,f2467,f2468,f2469,f2470,f2471,f2472,f2473,f2474,f2475,f2476,f2477,f2478,f2479,
+f2480,f2481,f2482,f2483,f2484,f2485,f2486,f2487,f2488,f2489,f2490,f2491,f2492,f2493,f2494,f2495,
+f2496,f2497,f2498,f2499,f2500,f2501,f2502,f2503,f2504,f2505,f2506,f2507,f2508,f2509,f2510,f2511,
+f2512,f2513,f2514,f2515,f2516,f2517,f2518,f2519,f2520,f2521,f2522,f2523,f2524,f2525,f2526,f2527,
+f2528,f2529,f2530,f2531,f2532,f2533,f2534,f2535,f2536,f2537,f2538,f2539,f2540,f2541,f2542,f2543,
+f2544,f2545,f2546,f2547,f2548,f2549,f2550,f2551,f2552,f2553,f2554,f2555,f2556,f2557,f2558,f2559,
+f2560,f2561,f2562,f2563,f2564,f2565,f2566,f2567,f2568,f2569,f2570,f2571,f2572,f2573,f2574,f2575,
+f2576,f2577,f2578,f2579,f2580,f2581,f2582,f2583,f2584,f2585,f2586,f2587,f2588,f2589,f2590,f2591,
+f2592,f2593,f2594,f2595,f2596,f2597,f2598,f2599,f2600,f2601,f2602,f2603,f2604,f2605,f2606,f2607,
+f2608,f2609,f2610,f2611,f2612,f2613,f2614,f2615,f2616,f2617,f2618,f2619,f2620,f2621,f2622,f2623,
+f2624,f2625,f2626,f2627,f2628,f2629,f2630,f2631,f2632,f2633,f2634,f2635,f2636,f2637,f2638,f2639,
+f2640,f2641,f2642,f2643,f2644,f2645,f2646,f2647,f2648,f2649,f2650,f2651,f2652,f2653,f2654,f2655,
+f2656,f2657,f2658,f2659,f2660,f2661,f2662,f2663,f2664,f2665,f2666,f2667,f2668,f2669,f2670,f2671,
+f2672,f2673,f2674,f2675,f2676,f2677,f2678,f2679,f2680,f2681,f2682,f2683,f2684,f2685,f2686,f2687,
+f2688,f2689,f2690,f2691,f2692,f2693,f2694,f2695,f2696,f2697,f2698,f2699,f2700,f2701,f2702,f2703,
+f2704,f2705,f2706,f2707,f2708,f2709,f2710,f2711,f2712,f2713,f2714,f2715,f2716,f2717,f2718,f2719,
+f2720,f2721,f2722,f2723,f2724,f2725,f2726,f2727,f2728,f2729,f2730,f2731,f2732,f2733,f2734,f2735,
+f2736,f2737,f2738,f2739,f2740,f2741,f2742,f2743,f2744,f2745,f2746,f2747,f2748,f2749,f2750,f2751,
+f2752,f2753,f2754,f2755,f2756,f2757,f2758,f2759,f2760,f2761,f2762,f2763,f2764,f2765,f2766,f2767,
+f2768,f2769,f2770,f2771,f2772,f2773,f2774,f2775,f2776,f2777,f2778,f2779,f2780,f2781,f2782,f2783,
+f2784,f2785,f2786,f2787,f2788,f2789,f2790,f2791,f2792,f2793,f2794,f2795,f2796,f2797,f2798,f2799,
+f2800,f2801,f2802,f2803,f2804,f2805,f2806,f2807,f2808,f2809,f2810,f2811,f2812,f2813,f2814,f2815,
+f2816,f2817,f2818,f2819,f2820,f2821,f2822,f2823,f2824,f2825,f2826,f2827,f2828,f2829,f2830,f2831,
+f2832,f2833,f2834,f2835,f2836,f2837,f2838,f2839,f2840,f2841,f2842,f2843,f2844,f2845,f2846,f2847,
+f2848,f2849,f2850,f2851,f2852,f2853,f2854,f2855,f2856,f2857,f2858,f2859,f2860,f2861,f2862,f2863,
+f2864,f2865,f2866,f2867,f2868,f2869,f2870,f2871,f2872,f2873,f2874,f2875,f2876,f2877,f2878,f2879,
+f2880,f2881,f2882,f2883,f2884,f2885,f2886,f2887,f2888,f2889,f2890,f2891,f2892,f2893,f2894,f2895,
+f2896,f2897,f2898,f2899,f2900,f2901,f2902,f2903,f2904,f2905,f2906,f2907,f2908,f2909,f2910,f2911,
+f2912,f2913,f2914,f2915,f2916,f2917,f2918,f2919,f2920,f2921,f2922,f2923,f2924,f2925,f2926,f2927,
+f2928,f2929,f2930,f2931,f2932,f2933,f2934,f2935,f2936,f2937,f2938,f2939,f2940,f2941,f2942,f2943,
+f2944,f2945,f2946,f2947,f2948,f2949,f2950,f2951,f2952,f2953,f2954,f2955,f2956,f2957,f2958,f2959,
+f2960,f2961,f2962,f2963,f2964,f2965,f2966,f2967,f2968,f2969,f2970,f2971,f2972,f2973,f2974,f2975,
+f2976,f2977,f2978,f2979,f2980,f2981,f2982,f2983,f2984,f2985,f2986,f2987,f2988,f2989,f2990,f2991,
+f2992,f2993,f2994,f2995,f2996,f2997,f2998,f2999,f3000,f3001,f3002,f3003,f3004,f3005,f3006,f3007,
+f3008,f3009,f3010,f3011,f3012,f3013,f3014,f3015,f3016,f3017,f3018,f3019,f3020,f3021,f3022,f3023,
+f3024,f3025,f3026,f3027,f3028,f3029,f3030,f3031,f3032,f3033,f3034,f3035,f3036,f3037,f3038,f3039,
+f3040,f3041,f3042,f3043,f3044,f3045,f3046,f3047,f3048,f3049,f3050,f3051,f3052,f3053,f3054,f3055,
+f3056,f3057,f3058,f3059,f3060,f3061,f3062,f3063,f3064,f3065,f3066,f3067,f3068,f3069,f3070,f3071,
+f3072,f3073,f3074,f3075,f3076,f3077,f3078,f3079,f3080,f3081,f3082,f3083,f3084,f3085,f3086,f3087,
+f3088,f3089,f3090,f3091,f3092,f3093,f3094,f3095,f3096,f3097,f3098,f3099,f3100,f3101,f3102,f3103,
+f3104,f3105,f3106,f3107,f3108,f3109,f3110,f3111,f3112,f3113,f3114,f3115,f3116,f3117,f3118,f3119,
+f3120,f3121,f3122,f3123,f3124,f3125,f3126,f3127,f3128,f3129,f3130,f3131,f3132,f3133,f3134,f3135,
+f3136,f3137,f3138,f3139,f3140,f3141,f3142,f3143,f3144,f3145,f3146,f3147,f3148,f3149,f3150,f3151,
+f3152,f3153,f3154,f3155,f3156,f3157,f3158,f3159,f3160,f3161,f3162,f3163,f3164,f3165,f3166,f3167,
+f3168,f3169,f3170,f3171,f3172,f3173,f3174,f3175,f3176,f3177,f3178,f3179,f3180,f3181,f3182,f3183,
+f3184,f3185,f3186,f3187,f3188,f3189,f3190,f3191,f3192,f3193,f3194,f3195,f3196,f3197,f3198,f3199,
+f3200,f3201,f3202,f3203,f3204,f3205,f3206,f3207,f3208,f3209,f3210,f3211,f3212,f3213,f3214,f3215,
+f3216,f3217,f3218,f3219,f3220,f3221,f3222,f3223,f3224,f3225,f3226,f3227,f3228,f3229,f3230,f3231,
+f3232,f3233,f3234,f3235,f3236,f3237,f3238,f3239,f3240,f3241,f3242,f3243,f3244,f3245,f3246,f3247,
+f3248,f3249,f3250,f3251,f3252,f3253,f3254,f3255,f3256,f3257,f3258,f3259,f3260,f3261,f3262,f3263,
+f3264,f3265,f3266,f3267,f3268,f3269,f3270,f3271,f3272,f3273,f3274,f3275,f3276,f3277,f3278,f3279,
+f3280,f3281,f3282,f3283,f3284,f3285,f3286,f3287,f3288,f3289,f3290,f3291,f3292,f3293,f3294,f3295,
+f3296,f3297,f3298,f3299,f3300,f3301,f3302,f3303,f3304,f3305,f3306,f3307,f3308,f3309,f3310,f3311,
+f3312,f3313,f3314,f3315,f3316,f3317,f3318,f3319,f3320,f3321,f3322,f3323,f3324,f3325,f3326,f3327,
+f3328,f3329,f3330,f3331,f3332,f3333,f3334,f3335,f3336,f3337,f3338,f3339,f3340,f3341,f3342,f3343,
+f3344,f3345,f3346,f3347,f3348,f3349,f3350,f3351,f3352,f3353,f3354,f3355,f3356,f3357,f3358,f3359,
+f3360,f3361,f3362,f3363,f3364,f3365,f3366,f3367,f3368,f3369,f3370,f3371,f3372,f3373,f3374,f3375,
+f3376,f3377,f3378,f3379,f3380,f3381,f3382,f3383,f3384,f3385,f3386,f3387,f3388,f3389,f3390,f3391,
+f3392,f3393,f3394,f3395,f3396,f3397,f3398,f3399,f3400,f3401,f3402,f3403,f3404,f3405,f3406,f3407,
+f3408,f3409,f3410,f3411,f3412,f3413,f3414,f3415,f3416,f3417,f3418,f3419,f3420,f3421,f3422,f3423,
+f3424,f3425,f3426,f3427,f3428,f3429,f3430,f3431,f3432,f3433,f3434,f3435,f3436,f3437,f3438,f3439,
+f3440,f3441,f3442,f3443,f3444,f3445,f3446,f3447,f3448,f3449,f3450,f3451,f3452,f3453,f3454,f3455,
+f3456,f3457,f3458,f3459,f3460,f3461,f3462,f3463,f3464,f3465,f3466,f3467,f3468,f3469,f3470,f3471,
+f3472,f3473,f3474,f3475,f3476,f3477,f3478,f3479,f3480,f3481,f3482,f3483,f3484,f3485,f3486,f3487,
+f3488,f3489,f3490,f3491,f3492,f3493,f3494,f3495,f3496,f3497,f3498,f3499,f3500,f3501,f3502,f3503,
+f3504,f3505,f3506,f3507,f3508,f3509,f3510,f3511,f3512,f3513,f3514,f3515,f3516,f3517,f3518,f3519,
+f3520,f3521,f3522,f3523,f3524,f3525,f3526,f3527,f3528,f3529,f3530,f3531,f3532,f3533,f3534,f3535,
+f3536,f3537,f3538,f3539,f3540,f3541,f3542,f3543,f3544,f3545,f3546,f3547,f3548,f3549,f3550,f3551,
+f3552,f3553,f3554,f3555,f3556,f3557,f3558,f3559,f3560,f3561,f3562,f3563,f3564,f3565,f3566,f3567,
+f3568,f3569,f3570,f3571,f3572,f3573,f3574,f3575,f3576,f3577,f3578,f3579,f3580,f3581,f3582,f3583,
+f3584,f3585,f3586,f3587,f3588,f3589,f3590,f3591,f3592,f3593,f3594,f3595,f3596,f3597,f3598,f3599,
+f3600,f3601,f3602,f3603,f3604,f3605,f3606,f3607,f3608,f3609,f3610,f3611,f3612,f3613,f3614,f3615,
+f3616,f3617,f3618,f3619,f3620,f3621,f3622,f3623,f3624,f3625,f3626,f3627,f3628,f3629,f3630,f3631,
+f3632,f3633,f3634,f3635,f3636,f3637,f3638,f3639,f3640,f3641,f3642,f3643,f3644,f3645,f3646,f3647,
+f3648,f3649,f3650,f3651,f3652,f3653,f3654,f3655,f3656,f3657,f3658,f3659,f3660,f3661,f3662,f3663,
+f3664,f3665,f3666,f3667,f3668,f3669,f3670,f3671,f3672,f3673,f3674,f3675,f3676,f3677,f3678,f3679,
+f3680,f3681,f3682,f3683,f3684,f3685,f3686,f3687,f3688,f3689,f3690,f3691,f3692,f3693,f3694,f3695,
+f3696,f3697,f3698,f3699,f3700,f3701,f3702,f3703,f3704,f3705,f3706,f3707,f3708,f3709,f3710,f3711,
+f3712,f3713,f3714,f3715,f3716,f3717,f3718,f3719,f3720,f3721,f3722,f3723,f3724,f3725,f3726,f3727,
+f3728,f3729,f3730,f3731,f3732,f3733,f3734,f3735,f3736,f3737,f3738,f3739,f3740,f3741,f3742,f3743,
+f3744,f3745,f3746,f3747,f3748,f3749,f3750,f3751,f3752,f3753,f3754,f3755,f3756,f3757,f3758,f3759,
+f3760,f3761,f3762,f3763,f3764,f3765,f3766,f3767,f3768,f3769,f3770,f3771,f3772,f3773,f3774,f3775,
+f3776,f3777,f3778,f3779,f3780,f3781,f3782,f3783,f3784,f3785,f3786,f3787,f3788,f3789,f3790,f3791,
+f3792,f3793,f3794,f3795,f3796,f3797,f3798,f3799,f3800,f3801,f3802,f3803,f3804,f3805,f3806,f3807,
+f3808,f3809,f3810,f3811,f3812,f3813,f3814,f3815,f3816,f3817,f3818,f3819,f3820,f3821,f3822,f3823,
+f3824,f3825,f3826,f3827,f3828,f3829,f3830,f3831,f3832,f3833,f3834,f3835,f3836,f3837,f3838,f3839,
+f3840,f3841,f3842,f3843,f3844,f3845,f3846,f3847,f3848,f3849,f3850,f3851,f3852,f3853,f3854,f3855,
+f3856,f3857,f3858,f3859,f3860,f3861,f3862,f3863,f3864,f3865,f3866,f3867,f3868,f3869,f3870,f3871,
+f3872,f3873,f3874,f3875,f3876,f3877,f3878,f3879,f3880,f3881,f3882,f3883,f3884,f3885,f3886,f3887,
+f3888,f3889,f3890,f3891,f3892,f3893,f3894,f3895,f3896,f3897,f3898,f3899,f3900,f3901,f3902,f3903,
+f3904,f3905,f3906,f3907,f3908,f3909,f3910,f3911,f3912,f3913,f3914,f3915,f3916,f3917,f3918,f3919,
+f3920,f3921,f3922,f3923,f3924,f3925,f3926,f3927,f3928,f3929,f3930,f3931,f3932,f3933,f3934,f3935,
+f3936,f3937,f3938,f3939,f3940,f3941,f3942,f3943,f3944,f3945,f3946,f3947,f3948,f3949,f3950,f3951,
+f3952,f3953,f3954,f3955,f3956,f3957,f3958,f3959,f3960,f3961,f3962,f3963,f3964,f3965,f3966,f3967,
+f3968,f3969,f3970,f3971,f3972,f3973,f3974,f3975,f3976,f3977,f3978,f3979,f3980,f3981,f3982,f3983,
+f3984,f3985,f3986,f3987,f3988,f3989,f3990,f3991,f3992,f3993,f3994,f3995,f3996,f3997,f3998,f3999,
+f4000,f4001,f4002,f4003,f4004,f4005,f4006,f4007,f4008,f4009,f4010,f4011,f4012,f4013,f4014,f4015,
+f4016,f4017,f4018,f4019,f4020,f4021,f4022,f4023,f4024,f4025,f4026,f4027,f4028,f4029,f4030,f4031,
+f4032,f4033,f4034,f4035,f4036,f4037,f4038,f4039,f4040,f4041,f4042,f4043,f4044,f4045,f4046,f4047,
+f4048,f4049,f4050,f4051,f4052,f4053,f4054,f4055,f4056,f4057,f4058,f4059,f4060,f4061,f4062,f4063,
+f4064,f4065,f4066,f4067,f4068,f4069,f4070,f4071,f4072,f4073,f4074,f4075,f4076,f4077,f4078,f4079,
+f4080,f4081,f4082,f4083,f4084,f4085,f4086,f4087,f4088,f4089,f4090,f4091,f4092,f4093,f4094,f4095,
+};
+static const int ref[4096][2] = {
+{     0, 0},
+{     2,-1},{     1, 0},{     3,-1},{    -1,-1},{     3,-1},{     0, 0},{     4,-1},{    -3,-1},
+{     5,-1},{    -2,-1},{     6,-1},{    -7,-1},{     9,-1},{    -6,-1},{    10,-1},{   -15,-1},
+{    17,-1},{   -14,-1},{    18,-1},{   -31,-1},{    33,-1},{   -30,-1},{    34,-1},{   -63,-1},
+{    65,-1},{   -62,-1},{    66,-1},{  -127,-1},{   129,-1},{  -126,-1},{   130,-1},{  -255,-1},
+{   257,-1},{  -254,-1},{   258,-1},{  -511,-1},{   513,-1},{  -510,-1},{   514,-1},{ -1023,-1},
+{  1025,-1},{ -1022,-1},{  1026,-1},{ -2047,-1},{  2049,-1},{ -2046,-1},{  2050,-1},{ -4095,-1},
+{  4097,-1},{ -4094,-1},{  4098,-1},{ -8191,-1},{  8193,-1},{ -8190,-1},{  8194,-1},{-16383,-1},
+{ 16385,-1},{-16382,-1},{ 16386,-1},{-32767,-1},{ 32769,-1},{-32766,-1},{ 32770,-1},{    -2, 0},
+{     0, 0},{    -1, 0},{     1, 0},{    -3, 0},{     1, 0},{    -2, 0},{     2, 0},{    -5, 0},
+{     3, 0},{    -4, 0},{     4, 0},{    -9, 0},{     7, 0},{    -8, 0},{     8, 0},{   -17, 0},
+{    15, 0},{   -16, 0},{    16, 0},{   -33, 0},{    31, 0},{   -32, 0},{    32, 0},{   -65, 0},
+{    63, 0},{   -64, 0},{    64, 0},{  -129, 0},{   127, 0},{  -128, 0},{   128, 0},{  -257, 0},
+{   255, 0},{  -256, 0},{   256, 0},{  -513, 0},{   511, 0},{  -512, 0},{   512, 0},{ -1025, 0},
+{  1023, 0},{ -1024, 0},{  1024, 0},{ -2049, 0},{  2047, 0},{ -2048, 0},{  2048, 0},{ -4097, 0},
+{  4095, 0},{ -4096, 0},{  4096, 0},{ -8193, 0},{  8191, 0},{ -8192, 0},{  8192, 0},{-16385, 0},
+{ 16383, 0},{-16384, 0},{ 16384, 0},{-32769, 0},{ 32767, 0},{-32768, 0},{ 32768, 0},{    -1,-1},
+{     1,-1},{     0, 0},{     2,-1},{    -2,-1},{     2,-1},{    -1,-1},{     3,-1},{    -4,-1},
+{     4,-1},{    -3,-1},{     5,-1},{    -8,-1},{     8,-1},{    -7,-1},{     9,-1},{   -16,-1},
+{    16,-1},{   -15,-1},{    17,-1},{   -32,-1},{    32,-1},{   -31,-1},{    33,-1},{   -64,-1},
+{    64,-1},{   -63,-1},{    65,-1},{  -128,-1},{   128,-1},{  -127,-1},{   129,-1},{  -256,-1},
+{   256,-1},{  -255,-1},{   257,-1},{  -512,-1},{   512,-1},{  -511,-1},{   513,-1},{ -1024,-1},
+{  1024,-1},{ -1023,-1},{  1025,-1},{ -2048,-1},{  2048,-1},{ -2047,-1},{  2049,-1},{ -4096,-1},
+{  4096,-1},{ -4095,-1},{  4097,-1},{ -8192,-1},{  8192,-1},{ -8191,-1},{  8193,-1},{-16384,-1},
+{ 16384,-1},{-16383,-1},{ 16385,-1},{-32768,-1},{ 32768,-1},{-32767,-1},{ 32769,-1},{    -3, 0},
+{    -1,-1},{    -2, 0},{     0, 0},{    -4, 0},{     0, 0},{    -3, 0},{     1, 0},{    -6, 0},
+{     2, 0},{    -5, 0},{     3, 0},{   -10, 0},{     6, 0},{    -9, 0},{     7, 0},{   -18, 0},
+{    14, 0},{   -17, 0},{    15, 0},{   -34, 0},{    30, 0},{   -33, 0},{    31, 0},{   -66, 0},
+{    62, 0},{   -65, 0},{    63, 0},{  -130, 0},{   126, 0},{  -129, 0},{   127, 0},{  -258, 0},
+{   254, 0},{  -257, 0},{   255, 0},{  -514, 0},{   510, 0},{  -513, 0},{   511, 0},{ -1026, 0},
+{  1022, 0},{ -1025, 0},{  1023, 0},{ -2050, 0},{  2046, 0},{ -2049, 0},{  2047, 0},{ -4098, 0},
+{  4094, 0},{ -4097, 0},{  4095, 0},{ -8194, 0},{  8190, 0},{ -8193, 0},{  8191, 0},{-16386, 0},
+{ 16382, 0},{-16385, 0},{ 16383, 0},{-32770, 0},{ 32766, 0},{-32769, 0},{ 32767, 0},{     1, 0},
+{     3,-1},{     2, 0},{     4,-1},{     0, 0},{     4,-1},{     1, 0},{     5,-1},{    -2,-1},
+{     6,-1},{    -1,-1},{     7,-1},{    -6,-1},{    10,-1},{    -5,-1},{    11,-1},{   -14,-1},
+{    18,-1},{   -13,-1},{    19,-1},{   -30,-1},{    34,-1},{   -29,-1},{    35,-1},{   -62,-1},
+{    66,-1},{   -61,-1},{    67,-1},{  -126,-1},{   130,-1},{  -125,-1},{   131,-1},{  -254,-1},
+{   258,-1},{  -253,-1},{   259,-1},{  -510,-1},{   514,-1},{  -509,-1},{   515,-1},{ -1022,-1},
+{  1026,-1},{ -1021,-1},{  1027,-1},{ -2046,-1},{  2050,-1},{ -2045,-1},{  2051,-1},{ -4094,-1},
+{  4098,-1},{ -4093,-1},{  4099,-1},{ -8190,-1},{  8194,-1},{ -8189,-1},{  8195,-1},{-16382,-1},
+{ 16386,-1},{-16381,-1},{ 16387,-1},{-32766,-1},{ 32770,-1},{-32765,-1},{ 32771,-1},{    -3, 0},
+{    -1,-1},{    -2, 0},{     0, 0},{    -4, 0},{     0, 0},{    -3, 0},{     1, 0},{    -6, 0},
+{     2, 0},{    -5, 0},{     3, 0},{   -10, 0},{     6, 0},{    -9, 0},{     7, 0},{   -18, 0},
+{    14, 0},{   -17, 0},{    15, 0},{   -34, 0},{    30, 0},{   -33, 0},{    31, 0},{   -66, 0},
+{    62, 0},{   -65, 0},{    63, 0},{  -130, 0},{   126, 0},{  -129, 0},{   127, 0},{  -258, 0},
+{   254, 0},{  -257, 0},{   255, 0},{  -514, 0},{   510, 0},{  -513, 0},{   511, 0},{ -1026, 0},
+{  1022, 0},{ -1025, 0},{  1023, 0},{ -2050, 0},{  2046, 0},{ -2049, 0},{  2047, 0},{ -4098, 0},
+{  4094, 0},{ -4097, 0},{  4095, 0},{ -8194, 0},{  8190, 0},{ -8193, 0},{  8191, 0},{-16386, 0},
+{ 16382, 0},{-16385, 0},{ 16383, 0},{-32770, 0},{ 32766, 0},{-32769, 0},{ 32767, 0},{     0, 0},
+{     2,-1},{     1, 0},{     3,-1},{    -1,-1},{     3,-1},{     0, 0},{     4,-1},{    -3,-1},
+{     5,-1},{    -2,-1},{     6,-1},{    -7,-1},{     9,-1},{    -6,-1},{    10,-1},{   -15,-1},
+{    17,-1},{   -14,-1},{    18,-1},{   -31,-1},{    33,-1},{   -30,-1},{    34,-1},{   -63,-1},
+{    65,-1},{   -62,-1},{    66,-1},{  -127,-1},{   129,-1},{  -126,-1},{   130,-1},{  -255,-1},
+{   257,-1},{  -254,-1},{   258,-1},{  -511,-1},{   513,-1},{  -510,-1},{   514,-1},{ -1023,-1},
+{  1025,-1},{ -1022,-1},{  1026,-1},{ -2047,-1},{  2049,-1},{ -2046,-1},{  2050,-1},{ -4095,-1},
+{  4097,-1},{ -4094,-1},{  4098,-1},{ -8191,-1},{  8193,-1},{ -8190,-1},{  8194,-1},{-16383,-1},
+{ 16385,-1},{-16382,-1},{ 16386,-1},{-32767,-1},{ 32769,-1},{-32766,-1},{ 32770,-1},{    -4, 0},
+{    -2,-1},{    -3, 0},{    -1,-1},{    -5, 0},{    -1,-1},{    -4, 0},{     0, 0},{    -7, 0},
+{     1, 0},{    -6, 0},{     2, 0},{   -11, 0},{     5, 0},{   -10, 0},{     6, 0},{   -19, 0},
+{    13, 0},{   -18, 0},{    14, 0},{   -35, 0},{    29, 0},{   -34, 0},{    30, 0},{   -67, 0},
+{    61, 0},{   -66, 0},{    62, 0},{  -131, 0},{   125, 0},{  -130, 0},{   126, 0},{  -259, 0},
+{   253, 0},{  -258, 0},{   254, 0},{  -515, 0},{   509, 0},{  -514, 0},{   510, 0},{ -1027, 0},
+{  1021, 0},{ -1026, 0},{  1022, 0},{ -2051, 0},{  2045, 0},{ -2050, 0},{  2046, 0},{ -4099, 0},
+{  4093, 0},{ -4098, 0},{  4094, 0},{ -8195, 0},{  8189, 0},{ -8194, 0},{  8190, 0},{-16387, 0},
+{ 16381, 0},{-16386, 0},{ 16382, 0},{-32771, 0},{ 32765, 0},{-32770, 0},{ 32766, 0},{     3, 0},
+{     5,-1},{     4, 0},{     6,-1},{     2, 0},{     6,-1},{     3, 0},{     7,-1},{     0, 0},
+{     8,-1},{     1, 0},{     9,-1},{    -4,-1},{    12,-1},{    -3,-1},{    13,-1},{   -12,-1},
+{    20,-1},{   -11,-1},{    21,-1},{   -28,-1},{    36,-1},{   -27,-1},{    37,-1},{   -60,-1},
+{    68,-1},{   -59,-1},{    69,-1},{  -124,-1},{   132,-1},{  -123,-1},{   133,-1},{  -252,-1},
+{   260,-1},{  -251,-1},{   261,-1},{  -508,-1},{   516,-1},{  -507,-1},{   517,-1},{ -1020,-1},
+{  1028,-1},{ -1019,-1},{  1029,-1},{ -2044,-1},{  2052,-1},{ -2043,-1},{  2053,-1},{ -4092,-1},
+{  4100,-1},{ -4091,-1},{  4101,-1},{ -8188,-1},{  8196,-1},{ -8187,-1},{  8197,-1},{-16380,-1},
+{ 16388,-1},{-16379,-1},{ 16389,-1},{-32764,-1},{ 32772,-1},{-32763,-1},{ 32773,-1},{    -5, 0},
+{    -3,-1},{    -4, 0},{    -2,-1},{    -6, 0},{    -2,-1},{    -5, 0},{    -1,-1},{    -8, 0},
+{     0, 0},{    -7, 0},{     1, 0},{   -12, 0},{     4, 0},{   -11, 0},{     5, 0},{   -20, 0},
+{    12, 0},{   -19, 0},{    13, 0},{   -36, 0},{    28, 0},{   -35, 0},{    29, 0},{   -68, 0},
+{    60, 0},{   -67, 0},{    61, 0},{  -132, 0},{   124, 0},{  -131, 0},{   125, 0},{  -260, 0},
+{   252, 0},{  -259, 0},{   253, 0},{  -516, 0},{   508, 0},{  -515, 0},{   509, 0},{ -1028, 0},
+{  1020, 0},{ -1027, 0},{  1021, 0},{ -2052, 0},{  2044, 0},{ -2051, 0},{  2045, 0},{ -4100, 0},
+{  4092, 0},{ -4099, 0},{  4093, 0},{ -8196, 0},{  8188, 0},{ -8195, 0},{  8189, 0},{-16388, 0},
+{ 16380, 0},{-16387, 0},{ 16381, 0},{-32772, 0},{ 32764, 0},{-32771, 0},{ 32765, 0},{     2, 0},
+{     4,-1},{     3, 0},{     5,-1},{     1, 0},{     5,-1},{     2, 0},{     6,-1},{    -1,-1},
+{     7,-1},{     0, 0},{     8,-1},{    -5,-1},{    11,-1},{    -4,-1},{    12,-1},{   -13,-1},
+{    19,-1},{   -12,-1},{    20,-1},{   -29,-1},{    35,-1},{   -28,-1},{    36,-1},{   -61,-1},
+{    67,-1},{   -60,-1},{    68,-1},{  -125,-1},{   131,-1},{  -124,-1},{   132,-1},{  -253,-1},
+{   259,-1},{  -252,-1},{   260,-1},{  -509,-1},{   515,-1},{  -508,-1},{   516,-1},{ -1021,-1},
+{  1027,-1},{ -1020,-1},{  1028,-1},{ -2045,-1},{  2051,-1},{ -2044,-1},{  2052,-1},{ -4093,-1},
+{  4099,-1},{ -4092,-1},{  4100,-1},{ -8189,-1},{  8195,-1},{ -8188,-1},{  8196,-1},{-16381,-1},
+{ 16387,-1},{-16380,-1},{ 16388,-1},{-32765,-1},{ 32771,-1},{-32764,-1},{ 32772,-1},{    -6, 0},
+{    -4,-1},{    -5, 0},{    -3,-1},{    -7, 0},{    -3,-1},{    -6, 0},{    -2,-1},{    -9, 0},
+{    -1,-1},{    -8, 0},{     0, 0},{   -13, 0},{     3, 0},{   -12, 0},{     4, 0},{   -21, 0},
+{    11, 0},{   -20, 0},{    12, 0},{   -37, 0},{    27, 0},{   -36, 0},{    28, 0},{   -69, 0},
+{    59, 0},{   -68, 0},{    60, 0},{  -133, 0},{   123, 0},{  -132, 0},{   124, 0},{  -261, 0},
+{   251, 0},{  -260, 0},{   252, 0},{  -517, 0},{   507, 0},{  -516, 0},{   508, 0},{ -1029, 0},
+{  1019, 0},{ -1028, 0},{  1020, 0},{ -2053, 0},{  2043, 0},{ -2052, 0},{  2044, 0},{ -4101, 0},
+{  4091, 0},{ -4100, 0},{  4092, 0},{ -8197, 0},{  8187, 0},{ -8196, 0},{  8188, 0},{-16389, 0},
+{ 16379, 0},{-16388, 0},{ 16380, 0},{-32773, 0},{ 32763, 0},{-32772, 0},{ 32764, 0},{     7, 0},
+{     9,-1},{     8, 0},{    10,-1},{     6, 0},{    10,-1},{     7, 0},{    11,-1},{     4, 0},
+{    12,-1},{     5, 0},{    13,-1},{     0, 0},{    16,-1},{     1, 0},{    17,-1},{    -8,-1},
+{    24,-1},{    -7,-1},{    25,-1},{   -24,-1},{    40,-1},{   -23,-1},{    41,-1},{   -56,-1},
+{    72,-1},{   -55,-1},{    73,-1},{  -120,-1},{   136,-1},{  -119,-1},{   137,-1},{  -248,-1},
+{   264,-1},{  -247,-1},{   265,-1},{  -504,-1},{   520,-1},{  -503,-1},{   521,-1},{ -1016,-1},
+{  1032,-1},{ -1015,-1},{  1033,-1},{ -2040,-1},{  2056,-1},{ -2039,-1},{  2057,-1},{ -4088,-1},
+{  4104,-1},{ -4087,-1},{  4105,-1},{ -8184,-1},{  8200,-1},{ -8183,-1},{  8201,-1},{-16376,-1},
+{ 16392,-1},{-16375,-1},{ 16393,-1},{-32760,-1},{ 32776,-1},{-32759,-1},{ 32777,-1},{    -9, 0},
+{    -7,-1},{    -8, 0},{    -6,-1},{   -10, 0},{    -6,-1},{    -9, 0},{    -5,-1},{   -12, 0},
+{    -4,-1},{   -11, 0},{    -3,-1},{   -16, 0},{     0, 0},{   -15, 0},{     1, 0},{   -24, 0},
+{     8, 0},{   -23, 0},{     9, 0},{   -40, 0},{    24, 0},{   -39, 0},{    25, 0},{   -72, 0},
+{    56, 0},{   -71, 0},{    57, 0},{  -136, 0},{   120, 0},{  -135, 0},{   121, 0},{  -264, 0},
+{   248, 0},{  -263, 0},{   249, 0},{  -520, 0},{   504, 0},{  -519, 0},{   505, 0},{ -1032, 0},
+{  1016, 0},{ -1031, 0},{  1017, 0},{ -2056, 0},{  2040, 0},{ -2055, 0},{  2041, 0},{ -4104, 0},
+{  4088, 0},{ -4103, 0},{  4089, 0},{ -8200, 0},{  8184, 0},{ -8199, 0},{  8185, 0},{-16392, 0},
+{ 16376, 0},{-16391, 0},{ 16377, 0},{-32776, 0},{ 32760, 0},{-32775, 0},{ 32761, 0},{     6, 0},
+{     8,-1},{     7, 0},{     9,-1},{     5, 0},{     9,-1},{     6, 0},{    10,-1},{     3, 0},
+{    11,-1},{     4, 0},{    12,-1},{    -1,-1},{    15,-1},{     0, 0},{    16,-1},{    -9,-1},
+{    23,-1},{    -8,-1},{    24,-1},{   -25,-1},{    39,-1},{   -24,-1},{    40,-1},{   -57,-1},
+{    71,-1},{   -56,-1},{    72,-1},{  -121,-1},{   135,-1},{  -120,-1},{   136,-1},{  -249,-1},
+{   263,-1},{  -248,-1},{   264,-1},{  -505,-1},{   519,-1},{  -504,-1},{   520,-1},{ -1017,-1},
+{  1031,-1},{ -1016,-1},{  1032,-1},{ -2041,-1},{  2055,-1},{ -2040,-1},{  2056,-1},{ -4089,-1},
+{  4103,-1},{ -4088,-1},{  4104,-1},{ -8185,-1},{  8199,-1},{ -8184,-1},{  8200,-1},{-16377,-1},
+{ 16391,-1},{-16376,-1},{ 16392,-1},{-32761,-1},{ 32775,-1},{-32760,-1},{ 32776,-1},{   -10, 0},
+{    -8,-1},{    -9, 0},{    -7,-1},{   -11, 0},{    -7,-1},{   -10, 0},{    -6,-1},{   -13, 0},
+{    -5,-1},{   -12, 0},{    -4,-1},{   -17, 0},{    -1,-1},{   -16, 0},{     0, 0},{   -25, 0},
+{     7, 0},{   -24, 0},{     8, 0},{   -41, 0},{    23, 0},{   -40, 0},{    24, 0},{   -73, 0},
+{    55, 0},{   -72, 0},{    56, 0},{  -137, 0},{   119, 0},{  -136, 0},{   120, 0},{  -265, 0},
+{   247, 0},{  -264, 0},{   248, 0},{  -521, 0},{   503, 0},{  -520, 0},{   504, 0},{ -1033, 0},
+{  1015, 0},{ -1032, 0},{  1016, 0},{ -2057, 0},{  2039, 0},{ -2056, 0},{  2040, 0},{ -4105, 0},
+{  4087, 0},{ -4104, 0},{  4088, 0},{ -8201, 0},{  8183, 0},{ -8200, 0},{  8184, 0},{-16393, 0},
+{ 16375, 0},{-16392, 0},{ 16376, 0},{-32777, 0},{ 32759, 0},{-32776, 0},{ 32760, 0},{    15, 0},
+{    17,-1},{    16, 0},{    18,-1},{    14, 0},{    18,-1},{    15, 0},{    19,-1},{    12, 0},
+{    20,-1},{    13, 0},{    21,-1},{     8, 0},{    24,-1},{     9, 0},{    25,-1},{     0, 0},
+{    32,-1},{     1, 0},{    33,-1},{   -16,-1},{    48,-1},{   -15,-1},{    49,-1},{   -48,-1},
+{    80,-1},{   -47,-1},{    81,-1},{  -112,-1},{   144,-1},{  -111,-1},{   145,-1},{  -240,-1},
+{   272,-1},{  -239,-1},{   273,-1},{  -496,-1},{   528,-1},{  -495,-1},{   529,-1},{ -1008,-1},
+{  1040,-1},{ -1007,-1},{  1041,-1},{ -2032,-1},{  2064,-1},{ -2031,-1},{  2065,-1},{ -4080,-1},
+{  4112,-1},{ -4079,-1},{  4113,-1},{ -8176,-1},{  8208,-1},{ -8175,-1},{  8209,-1},{-16368,-1},
+{ 16400,-1},{-16367,-1},{ 16401,-1},{-32752,-1},{ 32784,-1},{-32751,-1},{ 32785,-1},{   -17, 0},
+{   -15,-1},{   -16, 0},{   -14,-1},{   -18, 0},{   -14,-1},{   -17, 0},{   -13,-1},{   -20, 0},
+{   -12,-1},{   -19, 0},{   -11,-1},{   -24, 0},{    -8,-1},{   -23, 0},{    -7,-1},{   -32, 0},
+{     0, 0},{   -31, 0},{     1, 0},{   -48, 0},{    16, 0},{   -47, 0},{    17, 0},{   -80, 0},
+{    48, 0},{   -79, 0},{    49, 0},{  -144, 0},{   112, 0},{  -143, 0},{   113, 0},{  -272, 0},
+{   240, 0},{  -271, 0},{   241, 0},{  -528, 0},{   496, 0},{  -527, 0},{   497, 0},{ -1040, 0},
+{  1008, 0},{ -1039, 0},{  1009, 0},{ -2064, 0},{  2032, 0},{ -2063, 0},{  2033, 0},{ -4112, 0},
+{  4080, 0},{ -4111, 0},{  4081, 0},{ -8208, 0},{  8176, 0},{ -8207, 0},{  8177, 0},{-16400, 0},
+{ 16368, 0},{-16399, 0},{ 16369, 0},{-32784, 0},{ 32752, 0},{-32783, 0},{ 32753, 0},{    14, 0},
+{    16,-1},{    15, 0},{    17,-1},{    13, 0},{    17,-1},{    14, 0},{    18,-1},{    11, 0},
+{    19,-1},{    12, 0},{    20,-1},{     7, 0},{    23,-1},{     8, 0},{    24,-1},{    -1,-1},
+{    31,-1},{     0, 0},{    32,-1},{   -17,-1},{    47,-1},{   -16,-1},{    48,-1},{   -49,-1},
+{    79,-1},{   -48,-1},{    80,-1},{  -113,-1},{   143,-1},{  -112,-1},{   144,-1},{  -241,-1},
+{   271,-1},{  -240,-1},{   272,-1},{  -497,-1},{   527,-1},{  -496,-1},{   528,-1},{ -1009,-1},
+{  1039,-1},{ -1008,-1},{  1040,-1},{ -2033,-1},{  2063,-1},{ -2032,-1},{  2064,-1},{ -4081,-1},
+{  4111,-1},{ -4080,-1},{  4112,-1},{ -8177,-1},{  8207,-1},{ -8176,-1},{  8208,-1},{-16369,-1},
+{ 16399,-1},{-16368,-1},{ 16400,-1},{-32753,-1},{ 32783,-1},{-32752,-1},{ 32784,-1},{   -18, 0},
+{   -16,-1},{   -17, 0},{   -15,-1},{   -19, 0},{   -15,-1},{   -18, 0},{   -14,-1},{   -21, 0},
+{   -13,-1},{   -20, 0},{   -12,-1},{   -25, 0},{    -9,-1},{   -24, 0},{    -8,-1},{   -33, 0},
+{    -1,-1},{   -32, 0},{     0, 0},{   -49, 0},{    15, 0},{   -48, 0},{    16, 0},{   -81, 0},
+{    47, 0},{   -80, 0},{    48, 0},{  -145, 0},{   111, 0},{  -144, 0},{   112, 0},{  -273, 0},
+{   239, 0},{  -272, 0},{   240, 0},{  -529, 0},{   495, 0},{  -528, 0},{   496, 0},{ -1041, 0},
+{  1007, 0},{ -1040, 0},{  1008, 0},{ -2065, 0},{  2031, 0},{ -2064, 0},{  2032, 0},{ -4113, 0},
+{  4079, 0},{ -4112, 0},{  4080, 0},{ -8209, 0},{  8175, 0},{ -8208, 0},{  8176, 0},{-16401, 0},
+{ 16367, 0},{-16400, 0},{ 16368, 0},{-32785, 0},{ 32751, 0},{-32784, 0},{ 32752, 0},{    31, 0},
+{    33,-1},{    32, 0},{    34,-1},{    30, 0},{    34,-1},{    31, 0},{    35,-1},{    28, 0},
+{    36,-1},{    29, 0},{    37,-1},{    24, 0},{    40,-1},{    25, 0},{    41,-1},{    16, 0},
+{    48,-1},{    17, 0},{    49,-1},{     0, 0},{    64,-1},{     1, 0},{    65,-1},{   -32,-1},
+{    96,-1},{   -31,-1},{    97,-1},{   -96,-1},{   160,-1},{   -95,-1},{   161,-1},{  -224,-1},
+{   288,-1},{  -223,-1},{   289,-1},{  -480,-1},{   544,-1},{  -479,-1},{   545,-1},{  -992,-1},
+{  1056,-1},{  -991,-1},{  1057,-1},{ -2016,-1},{  2080,-1},{ -2015,-1},{  2081,-1},{ -4064,-1},
+{  4128,-1},{ -4063,-1},{  4129,-1},{ -8160,-1},{  8224,-1},{ -8159,-1},{  8225,-1},{-16352,-1},
+{ 16416,-1},{-16351,-1},{ 16417,-1},{-32736,-1},{ 32800,-1},{-32735,-1},{ 32801,-1},{   -33, 0},
+{   -31,-1},{   -32, 0},{   -30,-1},{   -34, 0},{   -30,-1},{   -33, 0},{   -29,-1},{   -36, 0},
+{   -28,-1},{   -35, 0},{   -27,-1},{   -40, 0},{   -24,-1},{   -39, 0},{   -23,-1},{   -48, 0},
+{   -16,-1},{   -47, 0},{   -15,-1},{   -64, 0},{     0, 0},{   -63, 0},{     1, 0},{   -96, 0},
+{    32, 0},{   -95, 0},{    33, 0},{  -160, 0},{    96, 0},{  -159, 0},{    97, 0},{  -288, 0},
+{   224, 0},{  -287, 0},{   225, 0},{  -544, 0},{   480, 0},{  -543, 0},{   481, 0},{ -1056, 0},
+{   992, 0},{ -1055, 0},{   993, 0},{ -2080, 0},{  2016, 0},{ -2079, 0},{  2017, 0},{ -4128, 0},
+{  4064, 0},{ -4127, 0},{  4065, 0},{ -8224, 0},{  8160, 0},{ -8223, 0},{  8161, 0},{-16416, 0},
+{ 16352, 0},{-16415, 0},{ 16353, 0},{-32800, 0},{ 32736, 0},{-32799, 0},{ 32737, 0},{    30, 0},
+{    32,-1},{    31, 0},{    33,-1},{    29, 0},{    33,-1},{    30, 0},{    34,-1},{    27, 0},
+{    35,-1},{    28, 0},{    36,-1},{    23, 0},{    39,-1},{    24, 0},{    40,-1},{    15, 0},
+{    47,-1},{    16, 0},{    48,-1},{    -1,-1},{    63,-1},{     0, 0},{    64,-1},{   -33,-1},
+{    95,-1},{   -32,-1},{    96,-1},{   -97,-1},{   159,-1},{   -96,-1},{   160,-1},{  -225,-1},
+{   287,-1},{  -224,-1},{   288,-1},{  -481,-1},{   543,-1},{  -480,-1},{   544,-1},{  -993,-1},
+{  1055,-1},{  -992,-1},{  1056,-1},{ -2017,-1},{  2079,-1},{ -2016,-1},{  2080,-1},{ -4065,-1},
+{  4127,-1},{ -4064,-1},{  4128,-1},{ -8161,-1},{  8223,-1},{ -8160,-1},{  8224,-1},{-16353,-1},
+{ 16415,-1},{-16352,-1},{ 16416,-1},{-32737,-1},{ 32799,-1},{-32736,-1},{ 32800,-1},{   -34, 0},
+{   -32,-1},{   -33, 0},{   -31,-1},{   -35, 0},{   -31,-1},{   -34, 0},{   -30,-1},{   -37, 0},
+{   -29,-1},{   -36, 0},{   -28,-1},{   -41, 0},{   -25,-1},{   -40, 0},{   -24,-1},{   -49, 0},
+{   -17,-1},{   -48, 0},{   -16,-1},{   -65, 0},{    -1,-1},{   -64, 0},{     0, 0},{   -97, 0},
+{    31, 0},{   -96, 0},{    32, 0},{  -161, 0},{    95, 0},{  -160, 0},{    96, 0},{  -289, 0},
+{   223, 0},{  -288, 0},{   224, 0},{  -545, 0},{   479, 0},{  -544, 0},{   480, 0},{ -1057, 0},
+{   991, 0},{ -1056, 0},{   992, 0},{ -2081, 0},{  2015, 0},{ -2080, 0},{  2016, 0},{ -4129, 0},
+{  4063, 0},{ -4128, 0},{  4064, 0},{ -8225, 0},{  8159, 0},{ -8224, 0},{  8160, 0},{-16417, 0},
+{ 16351, 0},{-16416, 0},{ 16352, 0},{-32801, 0},{ 32735, 0},{-32800, 0},{ 32736, 0},{    63, 0},
+{    65,-1},{    64, 0},{    66,-1},{    62, 0},{    66,-1},{    63, 0},{    67,-1},{    60, 0},
+{    68,-1},{    61, 0},{    69,-1},{    56, 0},{    72,-1},{    57, 0},{    73,-1},{    48, 0},
+{    80,-1},{    49, 0},{    81,-1},{    32, 0},{    96,-1},{    33, 0},{    97,-1},{     0, 0},
+{   128,-1},{     1, 0},{   129,-1},{   -64,-1},{   192,-1},{   -63,-1},{   193,-1},{  -192,-1},
+{   320,-1},{  -191,-1},{   321,-1},{  -448,-1},{   576,-1},{  -447,-1},{   577,-1},{  -960,-1},
+{  1088,-1},{  -959,-1},{  1089,-1},{ -1984,-1},{  2112,-1},{ -1983,-1},{  2113,-1},{ -4032,-1},
+{  4160,-1},{ -4031,-1},{  4161,-1},{ -8128,-1},{  8256,-1},{ -8127,-1},{  8257,-1},{-16320,-1},
+{ 16448,-1},{-16319,-1},{ 16449,-1},{-32704,-1},{ 32832,-1},{-32703,-1},{ 32833,-1},{   -65, 0},
+{   -63,-1},{   -64, 0},{   -62,-1},{   -66, 0},{   -62,-1},{   -65, 0},{   -61,-1},{   -68, 0},
+{   -60,-1},{   -67, 0},{   -59,-1},{   -72, 0},{   -56,-1},{   -71, 0},{   -55,-1},{   -80, 0},
+{   -48,-1},{   -79, 0},{   -47,-1},{   -96, 0},{   -32,-1},{   -95, 0},{   -31,-1},{  -128, 0},
+{     0, 0},{  -127, 0},{     1, 0},{  -192, 0},{    64, 0},{  -191, 0},{    65, 0},{  -320, 0},
+{   192, 0},{  -319, 0},{   193, 0},{  -576, 0},{   448, 0},{  -575, 0},{   449, 0},{ -1088, 0},
+{   960, 0},{ -1087, 0},{   961, 0},{ -2112, 0},{  1984, 0},{ -2111, 0},{  1985, 0},{ -4160, 0},
+{  4032, 0},{ -4159, 0},{  4033, 0},{ -8256, 0},{  8128, 0},{ -8255, 0},{  8129, 0},{-16448, 0},
+{ 16320, 0},{-16447, 0},{ 16321, 0},{-32832, 0},{ 32704, 0},{-32831, 0},{ 32705, 0},{    62, 0},
+{    64,-1},{    63, 0},{    65,-1},{    61, 0},{    65,-1},{    62, 0},{    66,-1},{    59, 0},
+{    67,-1},{    60, 0},{    68,-1},{    55, 0},{    71,-1},{    56, 0},{    72,-1},{    47, 0},
+{    79,-1},{    48, 0},{    80,-1},{    31, 0},{    95,-1},{    32, 0},{    96,-1},{    -1,-1},
+{   127,-1},{     0, 0},{   128,-1},{   -65,-1},{   191,-1},{   -64,-1},{   192,-1},{  -193,-1},
+{   319,-1},{  -192,-1},{   320,-1},{  -449,-1},{   575,-1},{  -448,-1},{   576,-1},{  -961,-1},
+{  1087,-1},{  -960,-1},{  1088,-1},{ -1985,-1},{  2111,-1},{ -1984,-1},{  2112,-1},{ -4033,-1},
+{  4159,-1},{ -4032,-1},{  4160,-1},{ -8129,-1},{  8255,-1},{ -8128,-1},{  8256,-1},{-16321,-1},
+{ 16447,-1},{-16320,-1},{ 16448,-1},{-32705,-1},{ 32831,-1},{-32704,-1},{ 32832,-1},{   -66, 0},
+{   -64,-1},{   -65, 0},{   -63,-1},{   -67, 0},{   -63,-1},{   -66, 0},{   -62,-1},{   -69, 0},
+{   -61,-1},{   -68, 0},{   -60,-1},{   -73, 0},{   -57,-1},{   -72, 0},{   -56,-1},{   -81, 0},
+{   -49,-1},{   -80, 0},{   -48,-1},{   -97, 0},{   -33,-1},{   -96, 0},{   -32,-1},{  -129, 0},
+{    -1,-1},{  -128, 0},{     0, 0},{  -193, 0},{    63, 0},{  -192, 0},{    64, 0},{  -321, 0},
+{   191, 0},{  -320, 0},{   192, 0},{  -577, 0},{   447, 0},{  -576, 0},{   448, 0},{ -1089, 0},
+{   959, 0},{ -1088, 0},{   960, 0},{ -2113, 0},{  1983, 0},{ -2112, 0},{  1984, 0},{ -4161, 0},
+{  4031, 0},{ -4160, 0},{  4032, 0},{ -8257, 0},{  8127, 0},{ -8256, 0},{  8128, 0},{-16449, 0},
+{ 16319, 0},{-16448, 0},{ 16320, 0},{-32833, 0},{ 32703, 0},{-32832, 0},{ 32704, 0},{   127, 0},
+{   129,-1},{   128, 0},{   130,-1},{   126, 0},{   130,-1},{   127, 0},{   131,-1},{   124, 0},
+{   132,-1},{   125, 0},{   133,-1},{   120, 0},{   136,-1},{   121, 0},{   137,-1},{   112, 0},
+{   144,-1},{   113, 0},{   145,-1},{    96, 0},{   160,-1},{    97, 0},{   161,-1},{    64, 0},
+{   192,-1},{    65, 0},{   193,-1},{     0, 0},{   256,-1},{     1, 0},{   257,-1},{  -128,-1},
+{   384,-1},{  -127,-1},{   385,-1},{  -384,-1},{   640,-1},{  -383,-1},{   641,-1},{  -896,-1},
+{  1152,-1},{  -895,-1},{  1153,-1},{ -1920,-1},{  2176,-1},{ -1919,-1},{  2177,-1},{ -3968,-1},
+{  4224,-1},{ -3967,-1},{  4225,-1},{ -8064,-1},{  8320,-1},{ -8063,-1},{  8321,-1},{-16256,-1},
+{ 16512,-1},{-16255,-1},{ 16513,-1},{-32640,-1},{ 32896,-1},{-32639,-1},{ 32897,-1},{  -129, 0},
+{  -127,-1},{  -128, 0},{  -126,-1},{  -130, 0},{  -126,-1},{  -129, 0},{  -125,-1},{  -132, 0},
+{  -124,-1},{  -131, 0},{  -123,-1},{  -136, 0},{  -120,-1},{  -135, 0},{  -119,-1},{  -144, 0},
+{  -112,-1},{  -143, 0},{  -111,-1},{  -160, 0},{   -96,-1},{  -159, 0},{   -95,-1},{  -192, 0},
+{   -64,-1},{  -191, 0},{   -63,-1},{  -256, 0},{     0, 0},{  -255, 0},{     1, 0},{  -384, 0},
+{   128, 0},{  -383, 0},{   129, 0},{  -640, 0},{   384, 0},{  -639, 0},{   385, 0},{ -1152, 0},
+{   896, 0},{ -1151, 0},{   897, 0},{ -2176, 0},{  1920, 0},{ -2175, 0},{  1921, 0},{ -4224, 0},
+{  3968, 0},{ -4223, 0},{  3969, 0},{ -8320, 0},{  8064, 0},{ -8319, 0},{  8065, 0},{-16512, 0},
+{ 16256, 0},{-16511, 0},{ 16257, 0},{-32896, 0},{ 32640, 0},{-32895, 0},{ 32641, 0},{   126, 0},
+{   128,-1},{   127, 0},{   129,-1},{   125, 0},{   129,-1},{   126, 0},{   130,-1},{   123, 0},
+{   131,-1},{   124, 0},{   132,-1},{   119, 0},{   135,-1},{   120, 0},{   136,-1},{   111, 0},
+{   143,-1},{   112, 0},{   144,-1},{    95, 0},{   159,-1},{    96, 0},{   160,-1},{    63, 0},
+{   191,-1},{    64, 0},{   192,-1},{    -1,-1},{   255,-1},{     0, 0},{   256,-1},{  -129,-1},
+{   383,-1},{  -128,-1},{   384,-1},{  -385,-1},{   639,-1},{  -384,-1},{   640,-1},{  -897,-1},
+{  1151,-1},{  -896,-1},{  1152,-1},{ -1921,-1},{  2175,-1},{ -1920,-1},{  2176,-1},{ -3969,-1},
+{  4223,-1},{ -3968,-1},{  4224,-1},{ -8065,-1},{  8319,-1},{ -8064,-1},{  8320,-1},{-16257,-1},
+{ 16511,-1},{-16256,-1},{ 16512,-1},{-32641,-1},{ 32895,-1},{-32640,-1},{ 32896,-1},{  -130, 0},
+{  -128,-1},{  -129, 0},{  -127,-1},{  -131, 0},{  -127,-1},{  -130, 0},{  -126,-1},{  -133, 0},
+{  -125,-1},{  -132, 0},{  -124,-1},{  -137, 0},{  -121,-1},{  -136, 0},{  -120,-1},{  -145, 0},
+{  -113,-1},{  -144, 0},{  -112,-1},{  -161, 0},{   -97,-1},{  -160, 0},{   -96,-1},{  -193, 0},
+{   -65,-1},{  -192, 0},{   -64,-1},{  -257, 0},{    -1,-1},{  -256, 0},{     0, 0},{  -385, 0},
+{   127, 0},{  -384, 0},{   128, 0},{  -641, 0},{   383, 0},{  -640, 0},{   384, 0},{ -1153, 0},
+{   895, 0},{ -1152, 0},{   896, 0},{ -2177, 0},{  1919, 0},{ -2176, 0},{  1920, 0},{ -4225, 0},
+{  3967, 0},{ -4224, 0},{  3968, 0},{ -8321, 0},{  8063, 0},{ -8320, 0},{  8064, 0},{-16513, 0},
+{ 16255, 0},{-16512, 0},{ 16256, 0},{-32897, 0},{ 32639, 0},{-32896, 0},{ 32640, 0},{   255, 0},
+{   257,-1},{   256, 0},{   258,-1},{   254, 0},{   258,-1},{   255, 0},{   259,-1},{   252, 0},
+{   260,-1},{   253, 0},{   261,-1},{   248, 0},{   264,-1},{   249, 0},{   265,-1},{   240, 0},
+{   272,-1},{   241, 0},{   273,-1},{   224, 0},{   288,-1},{   225, 0},{   289,-1},{   192, 0},
+{   320,-1},{   193, 0},{   321,-1},{   128, 0},{   384,-1},{   129, 0},{   385,-1},{     0, 0},
+{   512,-1},{     1, 0},{   513,-1},{  -256,-1},{   768,-1},{  -255,-1},{   769,-1},{  -768,-1},
+{  1280,-1},{  -767,-1},{  1281,-1},{ -1792,-1},{  2304,-1},{ -1791,-1},{  2305,-1},{ -3840,-1},
+{  4352,-1},{ -3839,-1},{  4353,-1},{ -7936,-1},{  8448,-1},{ -7935,-1},{  8449,-1},{-16128,-1},
+{ 16640,-1},{-16127,-1},{ 16641,-1},{-32512,-1},{ 33024,-1},{-32511,-1},{ 33025,-1},{  -257, 0},
+{  -255,-1},{  -256, 0},{  -254,-1},{  -258, 0},{  -254,-1},{  -257, 0},{  -253,-1},{  -260, 0},
+{  -252,-1},{  -259, 0},{  -251,-1},{  -264, 0},{  -248,-1},{  -263, 0},{  -247,-1},{  -272, 0},
+{  -240,-1},{  -271, 0},{  -239,-1},{  -288, 0},{  -224,-1},{  -287, 0},{  -223,-1},{  -320, 0},
+{  -192,-1},{  -319, 0},{  -191,-1},{  -384, 0},{  -128,-1},{  -383, 0},{  -127,-1},{  -512, 0},
+{     0, 0},{  -511, 0},{     1, 0},{  -768, 0},{   256, 0},{  -767, 0},{   257, 0},{ -1280, 0},
+{   768, 0},{ -1279, 0},{   769, 0},{ -2304, 0},{  1792, 0},{ -2303, 0},{  1793, 0},{ -4352, 0},
+{  3840, 0},{ -4351, 0},{  3841, 0},{ -8448, 0},{  7936, 0},{ -8447, 0},{  7937, 0},{-16640, 0},
+{ 16128, 0},{-16639, 0},{ 16129, 0},{-33024, 0},{ 32512, 0},{-33023, 0},{ 32513, 0},{   254, 0},
+{   256,-1},{   255, 0},{   257,-1},{   253, 0},{   257,-1},{   254, 0},{   258,-1},{   251, 0},
+{   259,-1},{   252, 0},{   260,-1},{   247, 0},{   263,-1},{   248, 0},{   264,-1},{   239, 0},
+{   271,-1},{   240, 0},{   272,-1},{   223, 0},{   287,-1},{   224, 0},{   288,-1},{   191, 0},
+{   319,-1},{   192, 0},{   320,-1},{   127, 0},{   383,-1},{   128, 0},{   384,-1},{    -1,-1},
+{   511,-1},{     0, 0},{   512,-1},{  -257,-1},{   767,-1},{  -256,-1},{   768,-1},{  -769,-1},
+{  1279,-1},{  -768,-1},{  1280,-1},{ -1793,-1},{  2303,-1},{ -1792,-1},{  2304,-1},{ -3841,-1},
+{  4351,-1},{ -3840,-1},{  4352,-1},{ -7937,-1},{  8447,-1},{ -7936,-1},{  8448,-1},{-16129,-1},
+{ 16639,-1},{-16128,-1},{ 16640,-1},{-32513,-1},{ 33023,-1},{-32512,-1},{ 33024,-1},{  -258, 0},
+{  -256,-1},{  -257, 0},{  -255,-1},{  -259, 0},{  -255,-1},{  -258, 0},{  -254,-1},{  -261, 0},
+{  -253,-1},{  -260, 0},{  -252,-1},{  -265, 0},{  -249,-1},{  -264, 0},{  -248,-1},{  -273, 0},
+{  -241,-1},{  -272, 0},{  -240,-1},{  -289, 0},{  -225,-1},{  -288, 0},{  -224,-1},{  -321, 0},
+{  -193,-1},{  -320, 0},{  -192,-1},{  -385, 0},{  -129,-1},{  -384, 0},{  -128,-1},{  -513, 0},
+{    -1,-1},{  -512, 0},{     0, 0},{  -769, 0},{   255, 0},{  -768, 0},{   256, 0},{ -1281, 0},
+{   767, 0},{ -1280, 0},{   768, 0},{ -2305, 0},{  1791, 0},{ -2304, 0},{  1792, 0},{ -4353, 0},
+{  3839, 0},{ -4352, 0},{  3840, 0},{ -8449, 0},{  7935, 0},{ -8448, 0},{  7936, 0},{-16641, 0},
+{ 16127, 0},{-16640, 0},{ 16128, 0},{-33025, 0},{ 32511, 0},{-33024, 0},{ 32512, 0},{   511, 0},
+{   513,-1},{   512, 0},{   514,-1},{   510, 0},{   514,-1},{   511, 0},{   515,-1},{   508, 0},
+{   516,-1},{   509, 0},{   517,-1},{   504, 0},{   520,-1},{   505, 0},{   521,-1},{   496, 0},
+{   528,-1},{   497, 0},{   529,-1},{   480, 0},{   544,-1},{   481, 0},{   545,-1},{   448, 0},
+{   576,-1},{   449, 0},{   577,-1},{   384, 0},{   640,-1},{   385, 0},{   641,-1},{   256, 0},
+{   768,-1},{   257, 0},{   769,-1},{     0, 0},{  1024,-1},{     1, 0},{  1025,-1},{  -512,-1},
+{  1536,-1},{  -511,-1},{  1537,-1},{ -1536,-1},{  2560,-1},{ -1535,-1},{  2561,-1},{ -3584,-1},
+{  4608,-1},{ -3583,-1},{  4609,-1},{ -7680,-1},{  8704,-1},{ -7679,-1},{  8705,-1},{-15872,-1},
+{ 16896,-1},{-15871,-1},{ 16897,-1},{-32256,-1},{ 33280,-1},{-32255,-1},{ 33281,-1},{  -513, 0},
+{  -511,-1},{  -512, 0},{  -510,-1},{  -514, 0},{  -510,-1},{  -513, 0},{  -509,-1},{  -516, 0},
+{  -508,-1},{  -515, 0},{  -507,-1},{  -520, 0},{  -504,-1},{  -519, 0},{  -503,-1},{  -528, 0},
+{  -496,-1},{  -527, 0},{  -495,-1},{  -544, 0},{  -480,-1},{  -543, 0},{  -479,-1},{  -576, 0},
+{  -448,-1},{  -575, 0},{  -447,-1},{  -640, 0},{  -384,-1},{  -639, 0},{  -383,-1},{  -768, 0},
+{  -256,-1},{  -767, 0},{  -255,-1},{ -1024, 0},{     0, 0},{ -1023, 0},{     1, 0},{ -1536, 0},
+{   512, 0},{ -1535, 0},{   513, 0},{ -2560, 0},{  1536, 0},{ -2559, 0},{  1537, 0},{ -4608, 0},
+{  3584, 0},{ -4607, 0},{  3585, 0},{ -8704, 0},{  7680, 0},{ -8703, 0},{  7681, 0},{-16896, 0},
+{ 15872, 0},{-16895, 0},{ 15873, 0},{-33280, 0},{ 32256, 0},{-33279, 0},{ 32257, 0},{   510, 0},
+{   512,-1},{   511, 0},{   513,-1},{   509, 0},{   513,-1},{   510, 0},{   514,-1},{   507, 0},
+{   515,-1},{   508, 0},{   516,-1},{   503, 0},{   519,-1},{   504, 0},{   520,-1},{   495, 0},
+{   527,-1},{   496, 0},{   528,-1},{   479, 0},{   543,-1},{   480, 0},{   544,-1},{   447, 0},
+{   575,-1},{   448, 0},{   576,-1},{   383, 0},{   639,-1},{   384, 0},{   640,-1},{   255, 0},
+{   767,-1},{   256, 0},{   768,-1},{    -1,-1},{  1023,-1},{     0, 0},{  1024,-1},{  -513,-1},
+{  1535,-1},{  -512,-1},{  1536,-1},{ -1537,-1},{  2559,-1},{ -1536,-1},{  2560,-1},{ -3585,-1},
+{  4607,-1},{ -3584,-1},{  4608,-1},{ -7681,-1},{  8703,-1},{ -7680,-1},{  8704,-1},{-15873,-1},
+{ 16895,-1},{-15872,-1},{ 16896,-1},{-32257,-1},{ 33279,-1},{-32256,-1},{ 33280,-1},{  -514, 0},
+{  -512,-1},{  -513, 0},{  -511,-1},{  -515, 0},{  -511,-1},{  -514, 0},{  -510,-1},{  -517, 0},
+{  -509,-1},{  -516, 0},{  -508,-1},{  -521, 0},{  -505,-1},{  -520, 0},{  -504,-1},{  -529, 0},
+{  -497,-1},{  -528, 0},{  -496,-1},{  -545, 0},{  -481,-1},{  -544, 0},{  -480,-1},{  -577, 0},
+{  -449,-1},{  -576, 0},{  -448,-1},{  -641, 0},{  -385,-1},{  -640, 0},{  -384,-1},{  -769, 0},
+{  -257,-1},{  -768, 0},{  -256,-1},{ -1025, 0},{    -1,-1},{ -1024, 0},{     0, 0},{ -1537, 0},
+{   511, 0},{ -1536, 0},{   512, 0},{ -2561, 0},{  1535, 0},{ -2560, 0},{  1536, 0},{ -4609, 0},
+{  3583, 0},{ -4608, 0},{  3584, 0},{ -8705, 0},{  7679, 0},{ -8704, 0},{  7680, 0},{-16897, 0},
+{ 15871, 0},{-16896, 0},{ 15872, 0},{-33281, 0},{ 32255, 0},{-33280, 0},{ 32256, 0},{  1023, 0},
+{  1025,-1},{  1024, 0},{  1026,-1},{  1022, 0},{  1026,-1},{  1023, 0},{  1027,-1},{  1020, 0},
+{  1028,-1},{  1021, 0},{  1029,-1},{  1016, 0},{  1032,-1},{  1017, 0},{  1033,-1},{  1008, 0},
+{  1040,-1},{  1009, 0},{  1041,-1},{   992, 0},{  1056,-1},{   993, 0},{  1057,-1},{   960, 0},
+{  1088,-1},{   961, 0},{  1089,-1},{   896, 0},{  1152,-1},{   897, 0},{  1153,-1},{   768, 0},
+{  1280,-1},{   769, 0},{  1281,-1},{   512, 0},{  1536,-1},{   513, 0},{  1537,-1},{     0, 0},
+{  2048,-1},{     1, 0},{  2049,-1},{ -1024,-1},{  3072,-1},{ -1023,-1},{  3073,-1},{ -3072,-1},
+{  5120,-1},{ -3071,-1},{  5121,-1},{ -7168,-1},{  9216,-1},{ -7167,-1},{  9217,-1},{-15360,-1},
+{ 17408,-1},{-15359,-1},{ 17409,-1},{-31744,-1},{ 33792,-1},{-31743,-1},{ 33793,-1},{ -1025, 0},
+{ -1023,-1},{ -1024, 0},{ -1022,-1},{ -1026, 0},{ -1022,-1},{ -1025, 0},{ -1021,-1},{ -1028, 0},
+{ -1020,-1},{ -1027, 0},{ -1019,-1},{ -1032, 0},{ -1016,-1},{ -1031, 0},{ -1015,-1},{ -1040, 0},
+{ -1008,-1},{ -1039, 0},{ -1007,-1},{ -1056, 0},{  -992,-1},{ -1055, 0},{  -991,-1},{ -1088, 0},
+{  -960,-1},{ -1087, 0},{  -959,-1},{ -1152, 0},{  -896,-1},{ -1151, 0},{  -895,-1},{ -1280, 0},
+{  -768,-1},{ -1279, 0},{  -767,-1},{ -1536, 0},{  -512,-1},{ -1535, 0},{  -511,-1},{ -2048, 0},
+{     0, 0},{ -2047, 0},{     1, 0},{ -3072, 0},{  1024, 0},{ -3071, 0},{  1025, 0},{ -5120, 0},
+{  3072, 0},{ -5119, 0},{  3073, 0},{ -9216, 0},{  7168, 0},{ -9215, 0},{  7169, 0},{-17408, 0},
+{ 15360, 0},{-17407, 0},{ 15361, 0},{-33792, 0},{ 31744, 0},{-33791, 0},{ 31745, 0},{  1022, 0},
+{  1024,-1},{  1023, 0},{  1025,-1},{  1021, 0},{  1025,-1},{  1022, 0},{  1026,-1},{  1019, 0},
+{  1027,-1},{  1020, 0},{  1028,-1},{  1015, 0},{  1031,-1},{  1016, 0},{  1032,-1},{  1007, 0},
+{  1039,-1},{  1008, 0},{  1040,-1},{   991, 0},{  1055,-1},{   992, 0},{  1056,-1},{   959, 0},
+{  1087,-1},{   960, 0},{  1088,-1},{   895, 0},{  1151,-1},{   896, 0},{  1152,-1},{   767, 0},
+{  1279,-1},{   768, 0},{  1280,-1},{   511, 0},{  1535,-1},{   512, 0},{  1536,-1},{    -1,-1},
+{  2047,-1},{     0, 0},{  2048,-1},{ -1025,-1},{  3071,-1},{ -1024,-1},{  3072,-1},{ -3073,-1},
+{  5119,-1},{ -3072,-1},{  5120,-1},{ -7169,-1},{  9215,-1},{ -7168,-1},{  9216,-1},{-15361,-1},
+{ 17407,-1},{-15360,-1},{ 17408,-1},{-31745,-1},{ 33791,-1},{-31744,-1},{ 33792,-1},{ -1026, 0},
+{ -1024,-1},{ -1025, 0},{ -1023,-1},{ -1027, 0},{ -1023,-1},{ -1026, 0},{ -1022,-1},{ -1029, 0},
+{ -1021,-1},{ -1028, 0},{ -1020,-1},{ -1033, 0},{ -1017,-1},{ -1032, 0},{ -1016,-1},{ -1041, 0},
+{ -1009,-1},{ -1040, 0},{ -1008,-1},{ -1057, 0},{  -993,-1},{ -1056, 0},{  -992,-1},{ -1089, 0},
+{  -961,-1},{ -1088, 0},{  -960,-1},{ -1153, 0},{  -897,-1},{ -1152, 0},{  -896,-1},{ -1281, 0},
+{  -769,-1},{ -1280, 0},{  -768,-1},{ -1537, 0},{  -513,-1},{ -1536, 0},{  -512,-1},{ -2049, 0},
+{    -1,-1},{ -2048, 0},{     0, 0},{ -3073, 0},{  1023, 0},{ -3072, 0},{  1024, 0},{ -5121, 0},
+{  3071, 0},{ -5120, 0},{  3072, 0},{ -9217, 0},{  7167, 0},{ -9216, 0},{  7168, 0},{-17409, 0},
+{ 15359, 0},{-17408, 0},{ 15360, 0},{-33793, 0},{ 31743, 0},{-33792, 0},{ 31744, 0},{  2047, 0},
+{  2049,-1},{  2048, 0},{  2050,-1},{  2046, 0},{  2050,-1},{  2047, 0},{  2051,-1},{  2044, 0},
+{  2052,-1},{  2045, 0},{  2053,-1},{  2040, 0},{  2056,-1},{  2041, 0},{  2057,-1},{  2032, 0},
+{  2064,-1},{  2033, 0},{  2065,-1},{  2016, 0},{  2080,-1},{  2017, 0},{  2081,-1},{  1984, 0},
+{  2112,-1},{  1985, 0},{  2113,-1},{  1920, 0},{  2176,-1},{  1921, 0},{  2177,-1},{  1792, 0},
+{  2304,-1},{  1793, 0},{  2305,-1},{  1536, 0},{  2560,-1},{  1537, 0},{  2561,-1},{  1024, 0},
+{  3072,-1},{  1025, 0},{  3073,-1},{     0, 0},{  4096,-1},{     1, 0},{  4097,-1},{ -2048,-1},
+{  6144,-1},{ -2047,-1},{  6145,-1},{ -6144,-1},{ 10240,-1},{ -6143,-1},{ 10241,-1},{-14336,-1},
+{ 18432,-1},{-14335,-1},{ 18433,-1},{-30720,-1},{ 34816,-1},{-30719,-1},{ 34817,-1},{ -2049, 0},
+{ -2047,-1},{ -2048, 0},{ -2046,-1},{ -2050, 0},{ -2046,-1},{ -2049, 0},{ -2045,-1},{ -2052, 0},
+{ -2044,-1},{ -2051, 0},{ -2043,-1},{ -2056, 0},{ -2040,-1},{ -2055, 0},{ -2039,-1},{ -2064, 0},
+{ -2032,-1},{ -2063, 0},{ -2031,-1},{ -2080, 0},{ -2016,-1},{ -2079, 0},{ -2015,-1},{ -2112, 0},
+{ -1984,-1},{ -2111, 0},{ -1983,-1},{ -2176, 0},{ -1920,-1},{ -2175, 0},{ -1919,-1},{ -2304, 0},
+{ -1792,-1},{ -2303, 0},{ -1791,-1},{ -2560, 0},{ -1536,-1},{ -2559, 0},{ -1535,-1},{ -3072, 0},
+{ -1024,-1},{ -3071, 0},{ -1023,-1},{ -4096, 0},{     0, 0},{ -4095, 0},{     1, 0},{ -6144, 0},
+{  2048, 0},{ -6143, 0},{  2049, 0},{-10240, 0},{  6144, 0},{-10239, 0},{  6145, 0},{-18432, 0},
+{ 14336, 0},{-18431, 0},{ 14337, 0},{-34816, 0},{ 30720, 0},{-34815, 0},{ 30721, 0},{  2046, 0},
+{  2048,-1},{  2047, 0},{  2049,-1},{  2045, 0},{  2049,-1},{  2046, 0},{  2050,-1},{  2043, 0},
+{  2051,-1},{  2044, 0},{  2052,-1},{  2039, 0},{  2055,-1},{  2040, 0},{  2056,-1},{  2031, 0},
+{  2063,-1},{  2032, 0},{  2064,-1},{  2015, 0},{  2079,-1},{  2016, 0},{  2080,-1},{  1983, 0},
+{  2111,-1},{  1984, 0},{  2112,-1},{  1919, 0},{  2175,-1},{  1920, 0},{  2176,-1},{  1791, 0},
+{  2303,-1},{  1792, 0},{  2304,-1},{  1535, 0},{  2559,-1},{  1536, 0},{  2560,-1},{  1023, 0},
+{  3071,-1},{  1024, 0},{  3072,-1},{    -1,-1},{  4095,-1},{     0, 0},{  4096,-1},{ -2049,-1},
+{  6143,-1},{ -2048,-1},{  6144,-1},{ -6145,-1},{ 10239,-1},{ -6144,-1},{ 10240,-1},{-14337,-1},
+{ 18431,-1},{-14336,-1},{ 18432,-1},{-30721,-1},{ 34815,-1},{-30720,-1},{ 34816,-1},{ -2050, 0},
+{ -2048,-1},{ -2049, 0},{ -2047,-1},{ -2051, 0},{ -2047,-1},{ -2050, 0},{ -2046,-1},{ -2053, 0},
+{ -2045,-1},{ -2052, 0},{ -2044,-1},{ -2057, 0},{ -2041,-1},{ -2056, 0},{ -2040,-1},{ -2065, 0},
+{ -2033,-1},{ -2064, 0},{ -2032,-1},{ -2081, 0},{ -2017,-1},{ -2080, 0},{ -2016,-1},{ -2113, 0},
+{ -1985,-1},{ -2112, 0},{ -1984,-1},{ -2177, 0},{ -1921,-1},{ -2176, 0},{ -1920,-1},{ -2305, 0},
+{ -1793,-1},{ -2304, 0},{ -1792,-1},{ -2561, 0},{ -1537,-1},{ -2560, 0},{ -1536,-1},{ -3073, 0},
+{ -1025,-1},{ -3072, 0},{ -1024,-1},{ -4097, 0},{    -1,-1},{ -4096, 0},{     0, 0},{ -6145, 0},
+{  2047, 0},{ -6144, 0},{  2048, 0},{-10241, 0},{  6143, 0},{-10240, 0},{  6144, 0},{-18433, 0},
+{ 14335, 0},{-18432, 0},{ 14336, 0},{-34817, 0},{ 30719, 0},{-34816, 0},{ 30720, 0},{  4095, 0},
+{  4097,-1},{  4096, 0},{  4098,-1},{  4094, 0},{  4098,-1},{  4095, 0},{  4099,-1},{  4092, 0},
+{  4100,-1},{  4093, 0},{  4101,-1},{  4088, 0},{  4104,-1},{  4089, 0},{  4105,-1},{  4080, 0},
+{  4112,-1},{  4081, 0},{  4113,-1},{  4064, 0},{  4128,-1},{  4065, 0},{  4129,-1},{  4032, 0},
+{  4160,-1},{  4033, 0},{  4161,-1},{  3968, 0},{  4224,-1},{  3969, 0},{  4225,-1},{  3840, 0},
+{  4352,-1},{  3841, 0},{  4353,-1},{  3584, 0},{  4608,-1},{  3585, 0},{  4609,-1},{  3072, 0},
+{  5120,-1},{  3073, 0},{  5121,-1},{  2048, 0},{  6144,-1},{  2049, 0},{  6145,-1},{     0, 0},
+{  8192,-1},{     1, 0},{  8193,-1},{ -4096,-1},{ 12288,-1},{ -4095,-1},{ 12289,-1},{-12288,-1},
+{ 20480,-1},{-12287,-1},{ 20481,-1},{-28672,-1},{ 36864,-1},{-28671,-1},{ 36865,-1},{ -4097, 0},
+{ -4095,-1},{ -4096, 0},{ -4094,-1},{ -4098, 0},{ -4094,-1},{ -4097, 0},{ -4093,-1},{ -4100, 0},
+{ -4092,-1},{ -4099, 0},{ -4091,-1},{ -4104, 0},{ -4088,-1},{ -4103, 0},{ -4087,-1},{ -4112, 0},
+{ -4080,-1},{ -4111, 0},{ -4079,-1},{ -4128, 0},{ -4064,-1},{ -4127, 0},{ -4063,-1},{ -4160, 0},
+{ -4032,-1},{ -4159, 0},{ -4031,-1},{ -4224, 0},{ -3968,-1},{ -4223, 0},{ -3967,-1},{ -4352, 0},
+{ -3840,-1},{ -4351, 0},{ -3839,-1},{ -4608, 0},{ -3584,-1},{ -4607, 0},{ -3583,-1},{ -5120, 0},
+{ -3072,-1},{ -5119, 0},{ -3071,-1},{ -6144, 0},{ -2048,-1},{ -6143, 0},{ -2047,-1},{ -8192, 0},
+{     0, 0},{ -8191, 0},{     1, 0},{-12288, 0},{  4096, 0},{-12287, 0},{  4097, 0},{-20480, 0},
+{ 12288, 0},{-20479, 0},{ 12289, 0},{-36864, 0},{ 28672, 0},{-36863, 0},{ 28673, 0},{  4094, 0},
+{  4096,-1},{  4095, 0},{  4097,-1},{  4093, 0},{  4097,-1},{  4094, 0},{  4098,-1},{  4091, 0},
+{  4099,-1},{  4092, 0},{  4100,-1},{  4087, 0},{  4103,-1},{  4088, 0},{  4104,-1},{  4079, 0},
+{  4111,-1},{  4080, 0},{  4112,-1},{  4063, 0},{  4127,-1},{  4064, 0},{  4128,-1},{  4031, 0},
+{  4159,-1},{  4032, 0},{  4160,-1},{  3967, 0},{  4223,-1},{  3968, 0},{  4224,-1},{  3839, 0},
+{  4351,-1},{  3840, 0},{  4352,-1},{  3583, 0},{  4607,-1},{  3584, 0},{  4608,-1},{  3071, 0},
+{  5119,-1},{  3072, 0},{  5120,-1},{  2047, 0},{  6143,-1},{  2048, 0},{  6144,-1},{    -1,-1},
+{  8191,-1},{     0, 0},{  8192,-1},{ -4097,-1},{ 12287,-1},{ -4096,-1},{ 12288,-1},{-12289,-1},
+{ 20479,-1},{-12288,-1},{ 20480,-1},{-28673,-1},{ 36863,-1},{-28672,-1},{ 36864,-1},{ -4098, 0},
+{ -4096,-1},{ -4097, 0},{ -4095,-1},{ -4099, 0},{ -4095,-1},{ -4098, 0},{ -4094,-1},{ -4101, 0},
+{ -4093,-1},{ -4100, 0},{ -4092,-1},{ -4105, 0},{ -4089,-1},{ -4104, 0},{ -4088,-1},{ -4113, 0},
+{ -4081,-1},{ -4112, 0},{ -4080,-1},{ -4129, 0},{ -4065,-1},{ -4128, 0},{ -4064,-1},{ -4161, 0},
+{ -4033,-1},{ -4160, 0},{ -4032,-1},{ -4225, 0},{ -3969,-1},{ -4224, 0},{ -3968,-1},{ -4353, 0},
+{ -3841,-1},{ -4352, 0},{ -3840,-1},{ -4609, 0},{ -3585,-1},{ -4608, 0},{ -3584,-1},{ -5121, 0},
+{ -3073,-1},{ -5120, 0},{ -3072,-1},{ -6145, 0},{ -2049,-1},{ -6144, 0},{ -2048,-1},{ -8193, 0},
+{    -1,-1},{ -8192, 0},{     0, 0},{-12289, 0},{  4095, 0},{-12288, 0},{  4096, 0},{-20481, 0},
+{ 12287, 0},{-20480, 0},{ 12288, 0},{-36865, 0},{ 28671, 0},{-36864, 0},{ 28672, 0},{  8191, 0},
+{  8193,-1},{  8192, 0},{  8194,-1},{  8190, 0},{  8194,-1},{  8191, 0},{  8195,-1},{  8188, 0},
+{  8196,-1},{  8189, 0},{  8197,-1},{  8184, 0},{  8200,-1},{  8185, 0},{  8201,-1},{  8176, 0},
+{  8208,-1},{  8177, 0},{  8209,-1},{  8160, 0},{  8224,-1},{  8161, 0},{  8225,-1},{  8128, 0},
+{  8256,-1},{  8129, 0},{  8257,-1},{  8064, 0},{  8320,-1},{  8065, 0},{  8321,-1},{  7936, 0},
+{  8448,-1},{  7937, 0},{  8449,-1},{  7680, 0},{  8704,-1},{  7681, 0},{  8705,-1},{  7168, 0},
+{  9216,-1},{  7169, 0},{  9217,-1},{  6144, 0},{ 10240,-1},{  6145, 0},{ 10241,-1},{  4096, 0},
+{ 12288,-1},{  4097, 0},{ 12289,-1},{     0, 0},{ 16384,-1},{     1, 0},{ 16385,-1},{ -8192,-1},
+{ 24576,-1},{ -8191,-1},{ 24577,-1},{-24576,-1},{ 40960,-1},{-24575,-1},{ 40961,-1},{ -8193, 0},
+{ -8191,-1},{ -8192, 0},{ -8190,-1},{ -8194, 0},{ -8190,-1},{ -8193, 0},{ -8189,-1},{ -8196, 0},
+{ -8188,-1},{ -8195, 0},{ -8187,-1},{ -8200, 0},{ -8184,-1},{ -8199, 0},{ -8183,-1},{ -8208, 0},
+{ -8176,-1},{ -8207, 0},{ -8175,-1},{ -8224, 0},{ -8160,-1},{ -8223, 0},{ -8159,-1},{ -8256, 0},
+{ -8128,-1},{ -8255, 0},{ -8127,-1},{ -8320, 0},{ -8064,-1},{ -8319, 0},{ -8063,-1},{ -8448, 0},
+{ -7936,-1},{ -8447, 0},{ -7935,-1},{ -8704, 0},{ -7680,-1},{ -8703, 0},{ -7679,-1},{ -9216, 0},
+{ -7168,-1},{ -9215, 0},{ -7167,-1},{-10240, 0},{ -6144,-1},{-10239, 0},{ -6143,-1},{-12288, 0},
+{ -4096,-1},{-12287, 0},{ -4095,-1},{-16384, 0},{     0, 0},{-16383, 0},{     1, 0},{-24576, 0},
+{  8192, 0},{-24575, 0},{  8193, 0},{-40960, 0},{ 24576, 0},{-40959, 0},{ 24577, 0},{  8190, 0},
+{  8192,-1},{  8191, 0},{  8193,-1},{  8189, 0},{  8193,-1},{  8190, 0},{  8194,-1},{  8187, 0},
+{  8195,-1},{  8188, 0},{  8196,-1},{  8183, 0},{  8199,-1},{  8184, 0},{  8200,-1},{  8175, 0},
+{  8207,-1},{  8176, 0},{  8208,-1},{  8159, 0},{  8223,-1},{  8160, 0},{  8224,-1},{  8127, 0},
+{  8255,-1},{  8128, 0},{  8256,-1},{  8063, 0},{  8319,-1},{  8064, 0},{  8320,-1},{  7935, 0},
+{  8447,-1},{  7936, 0},{  8448,-1},{  7679, 0},{  8703,-1},{  7680, 0},{  8704,-1},{  7167, 0},
+{  9215,-1},{  7168, 0},{  9216,-1},{  6143, 0},{ 10239,-1},{  6144, 0},{ 10240,-1},{  4095, 0},
+{ 12287,-1},{  4096, 0},{ 12288,-1},{    -1,-1},{ 16383,-1},{     0, 0},{ 16384,-1},{ -8193,-1},
+{ 24575,-1},{ -8192,-1},{ 24576,-1},{-24577,-1},{ 40959,-1},{-24576,-1},{ 40960,-1},{ -8194, 0},
+{ -8192,-1},{ -8193, 0},{ -8191,-1},{ -8195, 0},{ -8191,-1},{ -8194, 0},{ -8190,-1},{ -8197, 0},
+{ -8189,-1},{ -8196, 0},{ -8188,-1},{ -8201, 0},{ -8185,-1},{ -8200, 0},{ -8184,-1},{ -8209, 0},
+{ -8177,-1},{ -8208, 0},{ -8176,-1},{ -8225, 0},{ -8161,-1},{ -8224, 0},{ -8160,-1},{ -8257, 0},
+{ -8129,-1},{ -8256, 0},{ -8128,-1},{ -8321, 0},{ -8065,-1},{ -8320, 0},{ -8064,-1},{ -8449, 0},
+{ -7937,-1},{ -8448, 0},{ -7936,-1},{ -8705, 0},{ -7681,-1},{ -8704, 0},{ -7680,-1},{ -9217, 0},
+{ -7169,-1},{ -9216, 0},{ -7168,-1},{-10241, 0},{ -6145,-1},{-10240, 0},{ -6144,-1},{-12289, 0},
+{ -4097,-1},{-12288, 0},{ -4096,-1},{-16385, 0},{    -1,-1},{-16384, 0},{     0, 0},{-24577, 0},
+{  8191, 0},{-24576, 0},{  8192, 0},{-40961, 0},{ 24575, 0},{-40960, 0},{ 24576, 0},{ 16383, 0},
+{ 16385,-1},{ 16384, 0},{ 16386,-1},{ 16382, 0},{ 16386,-1},{ 16383, 0},{ 16387,-1},{ 16380, 0},
+{ 16388,-1},{ 16381, 0},{ 16389,-1},{ 16376, 0},{ 16392,-1},{ 16377, 0},{ 16393,-1},{ 16368, 0},
+{ 16400,-1},{ 16369, 0},{ 16401,-1},{ 16352, 0},{ 16416,-1},{ 16353, 0},{ 16417,-1},{ 16320, 0},
+{ 16448,-1},{ 16321, 0},{ 16449,-1},{ 16256, 0},{ 16512,-1},{ 16257, 0},{ 16513,-1},{ 16128, 0},
+{ 16640,-1},{ 16129, 0},{ 16641,-1},{ 15872, 0},{ 16896,-1},{ 15873, 0},{ 16897,-1},{ 15360, 0},
+{ 17408,-1},{ 15361, 0},{ 17409,-1},{ 14336, 0},{ 18432,-1},{ 14337, 0},{ 18433,-1},{ 12288, 0},
+{ 20480,-1},{ 12289, 0},{ 20481,-1},{  8192, 0},{ 24576,-1},{  8193, 0},{ 24577,-1},{     0, 0},
+{ 32768,-1},{     1, 0},{ 32769,-1},{-16384,-1},{ 49152,-1},{-16383,-1},{ 49153,-1},{-16385, 0},
+{-16383,-1},{-16384, 0},{-16382,-1},{-16386, 0},{-16382,-1},{-16385, 0},{-16381,-1},{-16388, 0},
+{-16380,-1},{-16387, 0},{-16379,-1},{-16392, 0},{-16376,-1},{-16391, 0},{-16375,-1},{-16400, 0},
+{-16368,-1},{-16399, 0},{-16367,-1},{-16416, 0},{-16352,-1},{-16415, 0},{-16351,-1},{-16448, 0},
+{-16320,-1},{-16447, 0},{-16319,-1},{-16512, 0},{-16256,-1},{-16511, 0},{-16255,-1},{-16640, 0},
+{-16128,-1},{-16639, 0},{-16127,-1},{-16896, 0},{-15872,-1},{-16895, 0},{-15871,-1},{-17408, 0},
+{-15360,-1},{-17407, 0},{-15359,-1},{-18432, 0},{-14336,-1},{-18431, 0},{-14335,-1},{-20480, 0},
+{-12288,-1},{-20479, 0},{-12287,-1},{-24576, 0},{ -8192,-1},{-24575, 0},{ -8191,-1},{-32768, 0},
+{     0, 0},{-32767, 0},{     1, 0},{-49152, 0},{ 16384, 0},{-49151, 0},{ 16385, 0},{ 16382, 0},
+{ 16384,-1},{ 16383, 0},{ 16385,-1},{ 16381, 0},{ 16385,-1},{ 16382, 0},{ 16386,-1},{ 16379, 0},
+{ 16387,-1},{ 16380, 0},{ 16388,-1},{ 16375, 0},{ 16391,-1},{ 16376, 0},{ 16392,-1},{ 16367, 0},
+{ 16399,-1},{ 16368, 0},{ 16400,-1},{ 16351, 0},{ 16415,-1},{ 16352, 0},{ 16416,-1},{ 16319, 0},
+{ 16447,-1},{ 16320, 0},{ 16448,-1},{ 16255, 0},{ 16511,-1},{ 16256, 0},{ 16512,-1},{ 16127, 0},
+{ 16639,-1},{ 16128, 0},{ 16640,-1},{ 15871, 0},{ 16895,-1},{ 15872, 0},{ 16896,-1},{ 15359, 0},
+{ 17407,-1},{ 15360, 0},{ 17408,-1},{ 14335, 0},{ 18431,-1},{ 14336, 0},{ 18432,-1},{ 12287, 0},
+{ 20479,-1},{ 12288, 0},{ 20480,-1},{  8191, 0},{ 24575,-1},{  8192, 0},{ 24576,-1},{    -1,-1},
+{ 32767,-1},{     0, 0},{ 32768,-1},{-16385,-1},{ 49151,-1},{-16384,-1},{ 49152,-1},{-16386, 0},
+{-16384,-1},{-16385, 0},{-16383,-1},{-16387, 0},{-16383,-1},{-16386, 0},{-16382,-1},{-16389, 0},
+{-16381,-1},{-16388, 0},{-16380,-1},{-16393, 0},{-16377,-1},{-16392, 0},{-16376,-1},{-16401, 0},
+{-16369,-1},{-16400, 0},{-16368,-1},{-16417, 0},{-16353,-1},{-16416, 0},{-16352,-1},{-16449, 0},
+{-16321,-1},{-16448, 0},{-16320,-1},{-16513, 0},{-16257,-1},{-16512, 0},{-16256,-1},{-16641, 0},
+{-16129,-1},{-16640, 0},{-16128,-1},{-16897, 0},{-15873,-1},{-16896, 0},{-15872,-1},{-17409, 0},
+{-15361,-1},{-17408, 0},{-15360,-1},{-18433, 0},{-14337,-1},{-18432, 0},{-14336,-1},{-20481, 0},
+{-12289,-1},{-20480, 0},{-12288,-1},{-24577, 0},{ -8193,-1},{-24576, 0},{ -8192,-1},{-32769, 0},
+{    -1,-1},{-32768, 0},{     0, 0},{-49153, 0},{ 16383, 0},{-49152, 0},{ 16384, 0},{ 32767, 0},
+{ 32769,-1},{ 32768, 0},{ 32770,-1},{ 32766, 0},{ 32770,-1},{ 32767, 0},{ 32771,-1},{ 32764, 0},
+{ 32772,-1},{ 32765, 0},{ 32773,-1},{ 32760, 0},{ 32776,-1},{ 32761, 0},{ 32777,-1},{ 32752, 0},
+{ 32784,-1},{ 32753, 0},{ 32785,-1},{ 32736, 0},{ 32800,-1},{ 32737, 0},{ 32801,-1},{ 32704, 0},
+{ 32832,-1},{ 32705, 0},{ 32833,-1},{ 32640, 0},{ 32896,-1},{ 32641, 0},{ 32897,-1},{ 32512, 0},
+{ 33024,-1},{ 32513, 0},{ 33025,-1},{ 32256, 0},{ 33280,-1},{ 32257, 0},{ 33281,-1},{ 31744, 0},
+{ 33792,-1},{ 31745, 0},{ 33793,-1},{ 30720, 0},{ 34816,-1},{ 30721, 0},{ 34817,-1},{ 28672, 0},
+{ 36864,-1},{ 28673, 0},{ 36865,-1},{ 24576, 0},{ 40960,-1},{ 24577, 0},{ 40961,-1},{ 16384, 0},
+{ 49152,-1},{ 16385, 0},{ 49153,-1},{     0, 0},{ 65536,-1},{     1, 0},{ 65537,-1},{-32769, 0},
+{-32767,-1},{-32768, 0},{-32766,-1},{-32770, 0},{-32766,-1},{-32769, 0},{-32765,-1},{-32772, 0},
+{-32764,-1},{-32771, 0},{-32763,-1},{-32776, 0},{-32760,-1},{-32775, 0},{-32759,-1},{-32784, 0},
+{-32752,-1},{-32783, 0},{-32751,-1},{-32800, 0},{-32736,-1},{-32799, 0},{-32735,-1},{-32832, 0},
+{-32704,-1},{-32831, 0},{-32703,-1},{-32896, 0},{-32640,-1},{-32895, 0},{-32639,-1},{-33024, 0},
+{-32512,-1},{-33023, 0},{-32511,-1},{-33280, 0},{-32256,-1},{-33279, 0},{-32255,-1},{-33792, 0},
+{-31744,-1},{-33791, 0},{-31743,-1},{-34816, 0},{-30720,-1},{-34815, 0},{-30719,-1},{-36864, 0},
+{-28672,-1},{-36863, 0},{-28671,-1},{-40960, 0},{-24576,-1},{-40959, 0},{-24575,-1},{-49152, 0},
+{-16384,-1},{-49151, 0},{-16383,-1},{-65536, 0},{     0, 0},{-65535, 0},{     1, 0},{ 32766, 0},
+{ 32768,-1},{ 32767, 0},{ 32769,-1},{ 32765, 0},{ 32769,-1},{ 32766, 0},{ 32770,-1},{ 32763, 0},
+{ 32771,-1},{ 32764, 0},{ 32772,-1},{ 32759, 0},{ 32775,-1},{ 32760, 0},{ 32776,-1},{ 32751, 0},
+{ 32783,-1},{ 32752, 0},{ 32784,-1},{ 32735, 0},{ 32799,-1},{ 32736, 0},{ 32800,-1},{ 32703, 0},
+{ 32831,-1},{ 32704, 0},{ 32832,-1},{ 32639, 0},{ 32895,-1},{ 32640, 0},{ 32896,-1},{ 32511, 0},
+{ 33023,-1},{ 32512, 0},{ 33024,-1},{ 32255, 0},{ 33279,-1},{ 32256, 0},{ 33280,-1},{ 31743, 0},
+{ 33791,-1},{ 31744, 0},{ 33792,-1},{ 30719, 0},{ 34815,-1},{ 30720, 0},{ 34816,-1},{ 28671, 0},
+{ 36863,-1},{ 28672, 0},{ 36864,-1},{ 24575, 0},{ 40959,-1},{ 24576, 0},{ 40960,-1},{ 16383, 0},
+{ 49151,-1},{ 16384, 0},{ 49152,-1},{    -1,-1},{ 65535,-1},{     0, 0},{ 65536,-1},{-32770, 0},
+{-32768,-1},{-32769, 0},{-32767,-1},{-32771, 0},{-32767,-1},{-32770, 0},{-32766,-1},{-32773, 0},
+{-32765,-1},{-32772, 0},{-32764,-1},{-32777, 0},{-32761,-1},{-32776, 0},{-32760,-1},{-32785, 0},
+{-32753,-1},{-32784, 0},{-32752,-1},{-32801, 0},{-32737,-1},{-32800, 0},{-32736,-1},{-32833, 0},
+{-32705,-1},{-32832, 0},{-32704,-1},{-32897, 0},{-32641,-1},{-32896, 0},{-32640,-1},{-33025, 0},
+{-32513,-1},{-33024, 0},{-32512,-1},{-33281, 0},{-32257,-1},{-33280, 0},{-32256,-1},{-33793, 0},
+{-31745,-1},{-33792, 0},{-31744,-1},{-34817, 0},{-30721,-1},{-34816, 0},{-30720,-1},{-36865, 0},
+{-28673,-1},{-36864, 0},{-28672,-1},{-40961, 0},{-24577,-1},{-40960, 0},{-24576,-1},{-49153, 0},
+{-16385,-1},{-49152, 0},{-16384,-1},{-65537, 0},{    -1,-1},{-65536, 0},{     0, 0},};
+int main ()
+{
+  mp_limb_t r1, r0;
+  int err = 0;
+  size_t ind = 0;
+  for (size_t i = 0; i < 4096; i++)
+    {
+      int ii = i / 64, jj = i % 64;
+      funcs[i](&r1, &r0);
+      if (r0 != (mp_limb_signed_t) ref[ind][0] || r1 != (mp_limb_signed_t) ref[ind][1]) {
+         printf ("error for f%zu(%d,%d): want (%d,%d) got (%d,%d)\n", i, (int) ops[ii], (int) ops[jj], ref[ind][1], ref[ind][0], (int) r1, (int) r0);
+         err++;
+       }
+      ind++;
+    }
+  return err != 0;
+}

diff --git a/tests/devel/try.c b/tests/devel/try.c
new file mode 100644
index 0000000..f8f4a1c
--- /dev/null
+++ b/tests/devel/try.c

@@ -0,0 +1,3658 @@
+/* Run some tests on various mpn routines.
+
+   THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
+
+Copyright 2000-2006, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Usage: try [options] <function>...
+
+   For example, "./try mpn_add_n" to run tests of that function.
+
+   Combinations of alignments and overlaps are tested, with redzones above
+   or below the destinations, and with the sources write-protected.
+
+   The number of tests performed becomes ridiculously large with all the
+   combinations, and for that reason this can't be a part of a "make check",
+   it's meant only for development.  The code isn't very pretty either.
+
+   During development it can help to disable the redzones, since seeing the
+   rest of the destination written can show where the wrong part is, or if
+   the dst pointers are off by 1 or whatever.  The magic DEADVAL initial
+   fill (see below) will show locations never written.
+
+   The -s option can be used to test only certain size operands, which is
+   useful if some new code doesn't yet support say sizes less than the
+   unrolling, or whatever.
+
+   When a problem occurs it'll of course be necessary to run the program
+   under gdb to find out quite where, how and why it's going wrong.  Disable
+   the spinner with the -W option when doing this, or single stepping won't
+   work.  Using the "-1" option to run with simple data can be useful.
+
+   New functions to test can be added in try_array[].  If a new TYPE is
+   required then add it to the existing constants, set up its parameters in
+   param_init(), and add it to the call() function.  Extra parameter fields
+   can be added if necessary, or further interpretations given to existing
+   fields.
+
+
+   Portability:
+
+   This program is not designed for use on Cray vector systems under Unicos,
+   it will fail to compile due to missing _SC_PAGE_SIZE.  Those systems
+   don't really have pages or mprotect.  We could arrange to run the tests
+   without the redzones, but we haven't bothered currently.
+
+
+   Enhancements:
+
+   umul_ppmm support is not very good, lots of source data is generated
+   whereas only two limbs are needed.
+
+   Make a little scheme for interpreting the "SIZE" selections uniformly.
+
+   Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
+   source limbs.  Possibly increase the default repetitions in that case.
+
+   Automatically detect gdb and disable the spinner (use -W for now).
+
+   Make a way to re-run a failing case in the debugger.  Have an option to
+   snapshot each test case before it's run so the data is available if a
+   segv occurs.  (This should be more reliable than the current print_all()
+   in the signal handler.)
+
+   When alignment means a dst isn't hard against the redzone, check the
+   space in between remains unchanged.
+
+   When a source overlaps a destination, don't run both s[i].high 0 and 1,
+   as s[i].high has no effect.  Maybe encode s[i].high into overlap->s[i].
+
+   When partial overlaps aren't done, don't loop over source alignments
+   during overlaps.
+
+   Try to make the looping code a bit less horrible.  Right now it's pretty
+   hard to see what iterations are actually done.
+
+   Perhaps specific setups and loops for each style of function under test
+   would be clearer than a parameterized general loop.  There's lots of
+   stuff common to all functions, but the exceptions get messy.
+
+   When there's no overlap, run with both src>dst and src<dst.  A subtle
+   calling-conventions violation occurred in a P6 copy which depended on the
+   relative location of src and dst.
+
+   multiplier_N is more or less a third source region for the addmul_N
+   routines, and could be done with the redzoned region scheme.
+
+*/
+
+
+/* always do assertion checking */
+#define WANT_ASSERT 1
+
+#include "config.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#if ! HAVE_DECL_SYS_NERR
+extern int sys_nerr;
+#endif
+
+#if ! HAVE_DECL_SYS_ERRLIST
+extern char *sys_errlist[];
+#endif
+
+#if ! HAVE_STRERROR
+char *
+strerror (int n)
+{
+  if (n < 0 || n >= sys_nerr)
+    return "errno out of range";
+  else
+    return sys_errlist[n];
+}
+#endif
+
+/* Rumour has it some systems lack a define of PROT_NONE. */
+#ifndef PROT_NONE
+#define PROT_NONE   0
+#endif
+
+/* Dummy defines for when mprotect doesn't exist. */
+#ifndef PROT_READ
+#define PROT_READ   0
+#endif
+#ifndef PROT_WRITE
+#define PROT_WRITE  0
+#endif
+
+/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
+   _SC_PAGE_SIZE instead. */
+#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
+#define _SC_PAGESIZE  _SC_PAGE_SIZE
+#endif
+
+
+#ifdef EXTRA_PROTOS
+EXTRA_PROTOS
+#endif
+#ifdef EXTRA_PROTOS2
+EXTRA_PROTOS2
+#endif
+
+
+#define DEFAULT_REPETITIONS  10
+
+int  option_repetitions = DEFAULT_REPETITIONS;
+int  option_spinner = 1;
+int  option_redzones = 1;
+int  option_firstsize = 0;
+int  option_lastsize = 500;
+int  option_firstsize2 = 0;
+
+#define ALIGNMENTS          4
+#define OVERLAPS            4
+#define CARRY_RANDOMS       5
+#define MULTIPLIER_RANDOMS  5
+#define DIVISOR_RANDOMS     5
+#define FRACTION_COUNT      4
+
+int  option_print = 0;
+
+#define DATA_TRAND  0
+#define DATA_ZEROS  1
+#define DATA_SEQ    2
+#define DATA_FFS    3
+#define DATA_2FD    4
+int  option_data = DATA_TRAND;
+
+
+mp_size_t  pagesize;
+#define PAGESIZE_LIMBS  (pagesize / GMP_LIMB_BYTES)
+
+/* must be a multiple of the page size */
+#define REDZONE_BYTES   (pagesize * 16)
+#define REDZONE_LIMBS   (REDZONE_BYTES / GMP_LIMB_BYTES)
+
+
+#define MAX3(x,y,z)   (MAX (x, MAX (y, z)))
+
+#if GMP_LIMB_BITS == 32
+#define DEADVAL  CNST_LIMB(0xDEADBEEF)
+#else
+#define DEADVAL  CNST_LIMB(0xDEADBEEFBADDCAFE)
+#endif
+
+
+struct region_t {
+  mp_ptr     ptr;
+  mp_size_t  size;
+};
+
+
+#define TRAP_NOWHERE 0
+#define TRAP_REF     1
+#define TRAP_FUN     2
+#define TRAP_SETUPS  3
+int trap_location = TRAP_NOWHERE;
+
+
+#define NUM_SOURCES  5
+#define NUM_DESTS    2
+
+struct source_t {
+  struct region_t  region;
+  int        high;
+  mp_size_t  align;
+  mp_ptr     p;
+};
+
+struct source_t  s[NUM_SOURCES];
+
+struct dest_t {
+  int        high;
+  mp_size_t  align;
+  mp_size_t  size;
+};
+
+struct dest_t  d[NUM_DESTS];
+
+struct source_each_t {
+  mp_ptr     p;
+};
+
+struct dest_each_t {
+  struct region_t  region;
+  mp_ptr     p;
+};
+
+mp_size_t       size;
+mp_size_t       size2;
+unsigned long   shift;
+mp_limb_t       carry;
+mp_limb_t       divisor;
+mp_limb_t       multiplier;
+mp_limb_t       multiplier_N[8];
+
+struct each_t {
+  const char  *name;
+  struct dest_each_t    d[NUM_DESTS];
+  struct source_each_t  s[NUM_SOURCES];
+  mp_limb_t  retval;
+};
+
+struct each_t  ref = { "Ref" };
+struct each_t  fun = { "Fun" };
+
+#define SRC_SIZE(n)  ((n) == 1 && tr->size2 ? size2 : size)
+
+void validate_fail (void);
+
+
+#if HAVE_TRY_NEW_C
+#include "try-new.c"
+#endif
+
+
+typedef mp_limb_t (*tryfun_t) (ANYARGS);
+
+struct try_t {
+  char  retval;
+
+  char  src[NUM_SOURCES];
+  char  dst[NUM_DESTS];
+
+#define SIZE_YES          1
+#define SIZE_ALLOW_ZERO   2
+#define SIZE_1            3  /* 1 limb  */
+#define SIZE_2            4  /* 2 limbs */
+#define SIZE_3            5  /* 3 limbs */
+#define SIZE_4            6  /* 4 limbs */
+#define SIZE_6            7  /* 6 limbs */
+#define SIZE_FRACTION     8  /* size2 is fraction for divrem etc */
+#define SIZE_SIZE2        9
+#define SIZE_PLUS_1      10
+#define SIZE_SUM         11
+#define SIZE_DIFF        12
+#define SIZE_DIFF_PLUS_1 13
+#define SIZE_DIFF_PLUS_3 14
+#define SIZE_RETVAL      15
+#define SIZE_CEIL_HALF   16
+#define SIZE_GET_STR     17
+#define SIZE_PLUS_MSIZE_SUB_1 18  /* size+msize-1 */
+#define SIZE_ODD         19
+  char  size;
+  char  size2;
+  char  dst_size[NUM_DESTS];
+
+  /* multiplier_N size in limbs */
+  mp_size_t  msize;
+
+  char  dst_bytes[NUM_DESTS];
+
+  char  dst0_from_src1;
+
+#define CARRY_BIT     1  /* single bit 0 or 1 */
+#define CARRY_3       2  /* 0, 1, 2 */
+#define CARRY_4       3  /* 0 to 3 */
+#define CARRY_LIMB    4  /* any limb value */
+#define CARRY_DIVISOR 5  /* carry<divisor */
+  char  carry;
+
+  /* a fudge to tell the output when to print negatives */
+  char  carry_sign;
+
+  char  multiplier;
+  char  shift;
+
+#define DIVISOR_LIMB  1
+#define DIVISOR_NORM  2
+#define DIVISOR_ODD   3
+  char  divisor;
+
+#define DATA_NON_ZERO         1
+#define DATA_GCD              2
+#define DATA_SRC0_ODD         3
+#define DATA_SRC0_HIGHBIT     4
+#define DATA_SRC1_ODD         5
+#define DATA_SRC1_ODD_PRIME   6
+#define DATA_SRC1_HIGHBIT     7
+#define DATA_MULTIPLE_DIVISOR 8
+#define DATA_UDIV_QRNND       9
+#define DATA_DIV_QR_1        10
+  char  data;
+
+/* Default is allow full overlap. */
+#define OVERLAP_NONE         1
+#define OVERLAP_LOW_TO_HIGH  2
+#define OVERLAP_HIGH_TO_LOW  3
+#define OVERLAP_NOT_SRCS     4
+#define OVERLAP_NOT_SRC2     8
+#define OVERLAP_NOT_DST2     16
+  char  overlap;
+
+  tryfun_t    reference;
+  const char  *reference_name;
+
+  void        (*validate) (void);
+  const char  *validate_name;
+};
+
+struct try_t  *tr;
+
+
+void
+validate_mod_34lsub1 (void)
+{
+#define CNST_34LSUB1   ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
+
+  mp_srcptr  ptr = s[0].p;
+  int        error = 0;
+  mp_limb_t  got, got_mod, want, want_mod;
+
+  ASSERT (size >= 1);
+
+  got = fun.retval;
+  got_mod = got % CNST_34LSUB1;
+
+  want = refmpn_mod_34lsub1 (ptr, size);
+  want_mod = want % CNST_34LSUB1;
+
+  if (got_mod != want_mod)
+    {
+      gmp_printf ("got   0x%MX reduced from 0x%MX\n", got_mod, got);
+      gmp_printf ("want  0x%MX reduced from 0x%MX\n", want_mod, want);
+      error = 1;
+    }
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_divexact_1 (void)
+{
+  mp_srcptr  src = s[0].p;
+  mp_srcptr  dst = fun.d[0].p;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+
+  {
+    mp_ptr     tp = refmpn_malloc_limbs (size);
+    mp_limb_t  rem;
+
+    rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
+    if (rem != 0)
+      {
+	gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
+	error = 1;
+      }
+    if (! refmpn_equal_anynail (tp, dst, size))
+      {
+	printf ("Quotient a/d wrong\n");
+	mpn_trace ("fun ", dst, size);
+	mpn_trace ("want", tp, size);
+	error = 1;
+      }
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_bdiv_q_1
+ (void)
+{
+  mp_srcptr  src = s[0].p;
+  mp_srcptr  dst = fun.d[0].p;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+
+  {
+    mp_ptr     tp = refmpn_malloc_limbs (size + 1);
+
+    refmpn_mul_1 (tp, dst, size, divisor);
+    /* Set ignored low bits */
+    tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
+    if (! refmpn_equal_anynail (tp, src, size))
+      {
+	printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
+	mpn_trace ("res ", dst, size);
+	mpn_trace ("src ", src, size);
+	error = 1;
+      }
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
+
+void
+validate_modexact_1c_odd (void)
+{
+  mp_srcptr  ptr = s[0].p;
+  mp_limb_t  r = fun.retval;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor & 1);
+
+  if ((r & GMP_NAIL_MASK) != 0)
+    printf ("r has non-zero nail\n");
+
+  if (carry < divisor)
+    {
+      if (! (r < divisor))
+	{
+	  printf ("Don't have r < divisor\n");
+	  error = 1;
+	}
+    }
+  else /* carry >= divisor */
+    {
+      if (! (r <= divisor))
+	{
+	  printf ("Don't have r <= divisor\n");
+	  error = 1;
+	}
+    }
+
+  {
+    mp_limb_t  c = carry % divisor;
+    mp_ptr     tp = refmpn_malloc_limbs (size+1);
+    mp_size_t  k;
+
+    for (k = size-1; k <= size; k++)
+      {
+	/* set {tp,size+1} to r*b^k + a - c */
+	refmpn_copyi (tp, ptr, size);
+	tp[size] = 0;
+	ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
+	if (refmpn_sub_1 (tp, tp, size+1, c))
+	  ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
+
+	if (refmpn_mod_1 (tp, size+1, divisor) == 0)
+	  goto good_remainder;
+      }
+    printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
+    error = 1;
+
+  good_remainder:
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_modexact_1_odd (void)
+{
+  carry = 0;
+  validate_modexact_1c_odd ();
+}
+
+void
+validate_div_qr_1_pi1 (void)
+{
+  mp_srcptr up = ref.s[0].p;
+  mp_size_t un = size;
+  mp_size_t uh = ref.s[1].p[0];
+  mp_srcptr qp = fun.d[0].p;
+  mp_limb_t r = fun.retval;
+  mp_limb_t cy;
+  int cmp;
+  mp_ptr tp;
+  if (r >= divisor)
+    {
+      gmp_printf ("Bad remainder %Md, d = %Md\n", r, divisor);
+      validate_fail ();
+    }
+  tp = refmpn_malloc_limbs (un);
+  cy = refmpn_mul_1 (tp, qp, un, divisor);
+  cy += refmpn_add_1 (tp, tp, un, r);
+  if (cy != uh || refmpn_cmp (tp, up, un) != 0)
+    {
+      gmp_printf ("Incorrect result, size %ld.\n"
+		  "d = %Mx, u = %Mx, %Nx\n"
+		  "got: r = %Mx, q = %Nx\n"
+		  "q d + r = %Mx, %Nx",
+		  (long) un,
+		  divisor, uh, up, un,
+		  r, qp, un,
+		  cy, tp, un);
+      validate_fail ();
+    }
+  free (tp);
+}
+
+
+void
+validate_sqrtrem (void)
+{
+  mp_srcptr  orig_ptr = s[0].p;
+  mp_size_t  orig_size = size;
+  mp_size_t  root_size = (size+1)/2;
+  mp_srcptr  root_ptr = fun.d[0].p;
+  mp_size_t  rem_size = fun.retval;
+  mp_srcptr  rem_ptr = fun.d[1].p;
+  mp_size_t  prod_size = 2*root_size;
+  mp_ptr     p;
+  int  error = 0;
+
+  if (rem_size < 0 || rem_size > size)
+    {
+      printf ("Bad remainder size retval %ld\n", (long) rem_size);
+      validate_fail ();
+    }
+
+  p = refmpn_malloc_limbs (prod_size);
+
+  p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
+  if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
+    {
+      printf ("Remainder bigger than 2*root\n");
+      error = 1;
+    }
+
+  refmpn_sqr (p, root_ptr, root_size);
+  if (rem_size != 0)
+    refmpn_add (p, p, prod_size, rem_ptr, rem_size);
+  if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
+    {
+      printf ("root^2+rem != original\n");
+      mpn_trace ("prod", p, prod_size);
+      error = 1;
+    }
+  free (p);
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_sqrt (void)
+{
+  mp_srcptr  orig_ptr = s[0].p;
+  mp_size_t  orig_size = size;
+  mp_size_t  root_size = (size+1)/2;
+  mp_srcptr  root_ptr = fun.d[0].p;
+  int        perf_pow = (fun.retval == 0);
+  mp_size_t  prod_size = 2*root_size;
+  mp_ptr     p;
+  int  error = 0;
+
+  p = refmpn_malloc_limbs (prod_size);
+
+  refmpn_sqr (p, root_ptr, root_size);
+  MPN_NORMALIZE (p, prod_size);
+  if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != - !perf_pow)
+    {
+      printf ("root^2 bigger than original, or wrong return value.\n");
+      mpn_trace ("prod...", p, prod_size);
+      error = 1;
+    }
+
+  refmpn_sub (p, orig_ptr,orig_size, p,prod_size);
+  MPN_NORMALIZE (p, prod_size);
+  if (prod_size >= root_size &&
+      refmpn_sub (p, p,prod_size, root_ptr, root_size) == 0 &&
+      refmpn_cmp_twosizes (p, prod_size, root_ptr, root_size) > 0)
+    {
+      printf ("(root+1)^2 smaller than original.\n");
+      mpn_trace ("prod", p, prod_size);
+      error = 1;
+    }
+  free (p);
+
+  if (error)
+    validate_fail ();
+}
+
+
+/* These types are indexes into the param[] array and are arbitrary so long
+   as they're all distinct and within the size of param[].  Renumber
+   whenever necessary or desired.  */
+
+enum {
+  TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
+
+  TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
+  TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
+
+  TYPE_MUL_1, TYPE_MUL_1C,
+
+  TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
+
+  TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
+
+  TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
+  TYPE_ADDMUL_7, TYPE_ADDMUL_8,
+
+  TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
+
+  TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
+
+  TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
+
+  TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
+  TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
+  TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
+  TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
+  TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
+  TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
+  TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
+
+  TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
+  TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
+  TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
+
+  TYPE_ADDCND_N, TYPE_SUBCND_N,
+
+  TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
+  TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
+  TYPE_DIV_QR_1N_PI1,
+  TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
+
+  TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
+  TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
+
+  TYPE_INVERT, TYPE_BINVERT,
+
+  TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
+  TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
+  TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
+
+  TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
+  TYPE_XOR_N, TYPE_XNOR_N,
+
+  TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
+  TYPE_MULLO_N, TYPE_SQRLO, TYPE_MULMID_MN, TYPE_MULMID_N,
+
+  TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
+
+  TYPE_SQRTREM, TYPE_SQRT, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
+
+  TYPE_EXTRA
+};
+
+struct try_t  param[TYPE_EXTRA];
+
+
+void
+param_init (void)
+{
+  struct try_t  *p;
+
+#define COPY(index)  memcpy (p, &param[index], sizeof (*p))
+
+#define REFERENCE(fun)                  \
+  p->reference = (tryfun_t) fun;        \
+  p->reference_name = #fun
+#define VALIDATE(fun)           \
+  p->validate = fun;            \
+  p->validate_name = #fun
+
+
+  p = &param[TYPE_ADD_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_add_n);
+
+  p = &param[TYPE_ADD_NC];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_add_nc);
+
+  p = &param[TYPE_SUB_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sub_n);
+
+  p = &param[TYPE_SUB_NC];
+  COPY (TYPE_ADD_NC);
+  REFERENCE (refmpn_sub_nc);
+
+  p = &param[TYPE_ADD];
+  COPY (TYPE_ADD_N);
+  p->size = SIZE_ALLOW_ZERO;
+  p->size2 = 1;
+  REFERENCE (refmpn_add);
+
+  p = &param[TYPE_SUB];
+  COPY (TYPE_ADD);
+  REFERENCE (refmpn_sub);
+
+
+  p = &param[TYPE_ADD_ERR1_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->src[2] = 1;
+  p->dst_size[1] = SIZE_2;
+  p->carry = CARRY_BIT;
+  p->overlap = OVERLAP_NOT_DST2;
+  REFERENCE (refmpn_add_err1_n);
+
+  p = &param[TYPE_SUB_ERR1_N];
+  COPY (TYPE_ADD_ERR1_N);
+  REFERENCE (refmpn_sub_err1_n);
+
+  p = &param[TYPE_ADD_ERR2_N];
+  COPY (TYPE_ADD_ERR1_N);
+  p->src[3] = 1;
+  p->dst_size[1] = SIZE_4;
+  REFERENCE (refmpn_add_err2_n);
+
+  p = &param[TYPE_SUB_ERR2_N];
+  COPY (TYPE_ADD_ERR2_N);
+  REFERENCE (refmpn_sub_err2_n);
+
+  p = &param[TYPE_ADD_ERR3_N];
+  COPY (TYPE_ADD_ERR2_N);
+  p->src[4] = 1;
+  p->dst_size[1] = SIZE_6;
+  REFERENCE (refmpn_add_err3_n);
+
+  p = &param[TYPE_SUB_ERR3_N];
+  COPY (TYPE_ADD_ERR3_N);
+  REFERENCE (refmpn_sub_err3_n);
+
+  p = &param[TYPE_ADDCND_N];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_cnd_add_n);
+
+  p = &param[TYPE_SUBCND_N];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_cnd_sub_n);
+
+
+  p = &param[TYPE_MUL_1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->multiplier = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  REFERENCE (refmpn_mul_1);
+
+  p = &param[TYPE_MUL_1C];
+  COPY (TYPE_MUL_1);
+  p->carry = CARRY_LIMB;
+  REFERENCE (refmpn_mul_1c);
+
+
+  p = &param[TYPE_MUL_2];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->msize = 2;
+  p->overlap = OVERLAP_NOT_SRC2;
+  REFERENCE (refmpn_mul_2);
+
+  p = &param[TYPE_MUL_3];
+  COPY (TYPE_MUL_2);
+  p->msize = 3;
+  REFERENCE (refmpn_mul_3);
+
+  p = &param[TYPE_MUL_4];
+  COPY (TYPE_MUL_2);
+  p->msize = 4;
+  REFERENCE (refmpn_mul_4);
+
+  p = &param[TYPE_MUL_5];
+  COPY (TYPE_MUL_2);
+  p->msize = 5;
+  REFERENCE (refmpn_mul_5);
+
+  p = &param[TYPE_MUL_6];
+  COPY (TYPE_MUL_2);
+  p->msize = 6;
+  REFERENCE (refmpn_mul_6);
+
+
+  p = &param[TYPE_ADDMUL_1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->multiplier = 1;
+  p->dst0_from_src1 = 1;
+  REFERENCE (refmpn_addmul_1);
+
+  p = &param[TYPE_ADDMUL_1C];
+  COPY (TYPE_ADDMUL_1);
+  p->carry = CARRY_LIMB;
+  REFERENCE (refmpn_addmul_1c);
+
+  p = &param[TYPE_SUBMUL_1];
+  COPY (TYPE_ADDMUL_1);
+  REFERENCE (refmpn_submul_1);
+
+  p = &param[TYPE_SUBMUL_1C];
+  COPY (TYPE_ADDMUL_1C);
+  REFERENCE (refmpn_submul_1c);
+
+
+  p = &param[TYPE_ADDMUL_2];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->msize = 2;
+  p->dst0_from_src1 = 1;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_addmul_2);
+
+  p = &param[TYPE_ADDMUL_3];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 3;
+  REFERENCE (refmpn_addmul_3);
+
+  p = &param[TYPE_ADDMUL_4];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 4;
+  REFERENCE (refmpn_addmul_4);
+
+  p = &param[TYPE_ADDMUL_5];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 5;
+  REFERENCE (refmpn_addmul_5);
+
+  p = &param[TYPE_ADDMUL_6];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 6;
+  REFERENCE (refmpn_addmul_6);
+
+  p = &param[TYPE_ADDMUL_7];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 7;
+  REFERENCE (refmpn_addmul_7);
+
+  p = &param[TYPE_ADDMUL_8];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 8;
+  REFERENCE (refmpn_addmul_8);
+
+
+  p = &param[TYPE_AND_N];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_and_n);
+
+  p = &param[TYPE_ANDN_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_andn_n);
+
+  p = &param[TYPE_NAND_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_nand_n);
+
+  p = &param[TYPE_IOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_ior_n);
+
+  p = &param[TYPE_IORN_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_iorn_n);
+
+  p = &param[TYPE_NIOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_nior_n);
+
+  p = &param[TYPE_XOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_xor_n);
+
+  p = &param[TYPE_XNOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_xnor_n);
+
+
+  p = &param[TYPE_ADDSUB_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_add_n_sub_n);
+
+  p = &param[TYPE_ADDSUB_NC];
+  COPY (TYPE_ADDSUB_N);
+  p->carry = CARRY_4;
+  REFERENCE (refmpn_add_n_sub_nc);
+
+
+  p = &param[TYPE_COPY];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_NONE;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copy);
+
+  p = &param[TYPE_COPYI];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copyi);
+
+  p = &param[TYPE_COPYD];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copyd);
+
+  p = &param[TYPE_COM];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_com);
+
+
+  p = &param[TYPE_ADDLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_addlsh1_n);
+
+  p = &param[TYPE_ADDLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_addlsh2_n);
+
+  p = &param[TYPE_ADDLSH_N];
+  COPY (TYPE_ADD_N);
+  p->shift = 1;
+  REFERENCE (refmpn_addlsh_n);
+
+  p = &param[TYPE_ADDLSH1_N_IP1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->dst0_from_src1 = 1;
+  REFERENCE (refmpn_addlsh1_n_ip1);
+
+  p = &param[TYPE_ADDLSH2_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh2_n_ip1);
+
+  p = &param[TYPE_ADDLSH_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  p->shift = 1;
+  REFERENCE (refmpn_addlsh_n_ip1);
+
+  p = &param[TYPE_ADDLSH1_N_IP2];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh1_n_ip2);
+
+  p = &param[TYPE_ADDLSH2_N_IP2];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh2_n_ip2);
+
+  p = &param[TYPE_ADDLSH_N_IP2];
+  COPY (TYPE_ADDLSH_N_IP1);
+  REFERENCE (refmpn_addlsh_n_ip2);
+
+  p = &param[TYPE_SUBLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sublsh1_n);
+
+  p = &param[TYPE_SUBLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sublsh2_n);
+
+  p = &param[TYPE_SUBLSH_N];
+  COPY (TYPE_ADDLSH_N);
+  REFERENCE (refmpn_sublsh_n);
+
+  p = &param[TYPE_SUBLSH1_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_sublsh1_n_ip1);
+
+  p = &param[TYPE_SUBLSH2_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_sublsh2_n_ip1);
+
+  p = &param[TYPE_SUBLSH_N_IP1];
+  COPY (TYPE_ADDLSH_N_IP1);
+  REFERENCE (refmpn_sublsh_n_ip1);
+
+  p = &param[TYPE_RSBLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsblsh1_n);
+
+  p = &param[TYPE_RSBLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsblsh2_n);
+
+  p = &param[TYPE_RSBLSH_N];
+  COPY (TYPE_ADDLSH_N);
+  REFERENCE (refmpn_rsblsh_n);
+
+  p = &param[TYPE_RSH1ADD_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsh1add_n);
+
+  p = &param[TYPE_RSH1SUB_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsh1sub_n);
+
+
+  p = &param[TYPE_ADDLSH1_NC];
+  COPY (TYPE_ADDLSH1_N);
+  p->carry = CARRY_3;
+  REFERENCE (refmpn_addlsh1_nc);
+
+  p = &param[TYPE_ADDLSH2_NC];
+  COPY (TYPE_ADDLSH2_N);
+  p->carry = CARRY_4; /* FIXME */
+  REFERENCE (refmpn_addlsh2_nc);
+
+  p = &param[TYPE_ADDLSH_NC];
+  COPY (TYPE_ADDLSH_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_addlsh_nc);
+
+  p = &param[TYPE_SUBLSH1_NC];
+  COPY (TYPE_ADDLSH1_NC);
+  REFERENCE (refmpn_sublsh1_nc);
+
+  p = &param[TYPE_SUBLSH2_NC];
+  COPY (TYPE_ADDLSH2_NC);
+  REFERENCE (refmpn_sublsh2_nc);
+
+  p = &param[TYPE_SUBLSH_NC];
+  COPY (TYPE_ADDLSH_NC);
+  REFERENCE (refmpn_sublsh_nc);
+
+  p = &param[TYPE_RSBLSH1_NC];
+  COPY (TYPE_RSBLSH1_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_rsblsh1_nc);
+
+  p = &param[TYPE_RSBLSH2_NC];
+  COPY (TYPE_RSBLSH2_N);
+  p->carry = CARRY_4; /* FIXME */
+  REFERENCE (refmpn_rsblsh2_nc);
+
+  p = &param[TYPE_RSBLSH_NC];
+  COPY (TYPE_RSBLSH_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_rsblsh_nc);
+
+
+  p = &param[TYPE_MOD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->divisor = DIVISOR_LIMB;
+  REFERENCE (refmpn_mod_1);
+
+  p = &param[TYPE_MOD_1C];
+  COPY (TYPE_MOD_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_mod_1c);
+
+  p = &param[TYPE_DIVMOD_1];
+  COPY (TYPE_MOD_1);
+  p->dst[0] = 1;
+  REFERENCE (refmpn_divmod_1);
+
+  p = &param[TYPE_DIVMOD_1C];
+  COPY (TYPE_DIVMOD_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_divmod_1c);
+
+  p = &param[TYPE_DIVREM_1];
+  COPY (TYPE_DIVMOD_1);
+  p->size2 = SIZE_FRACTION;
+  p->dst_size[0] = SIZE_SUM;
+  REFERENCE (refmpn_divrem_1);
+
+  p = &param[TYPE_DIVREM_1C];
+  COPY (TYPE_DIVREM_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_divrem_1c);
+
+  p = &param[TYPE_PREINV_DIVREM_1];
+  COPY (TYPE_DIVREM_1);
+  p->size = SIZE_YES; /* ie. no size==0 */
+  REFERENCE (refmpn_preinv_divrem_1);
+
+  p = &param[TYPE_DIV_QR_1N_PI1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  /* SIZE_1 not supported. Always uses low limb only. */
+  p->size2 = 1;
+  p->dst[0] = 1;
+  p->divisor = DIVISOR_NORM;
+  p->data = DATA_DIV_QR_1;
+  VALIDATE (validate_div_qr_1_pi1);
+
+  p = &param[TYPE_PREINV_MOD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_NORM;
+  REFERENCE (refmpn_preinv_mod_1);
+
+  p = &param[TYPE_MOD_34LSUB1];
+  p->retval = 1;
+  p->src[0] = 1;
+  VALIDATE (validate_mod_34lsub1);
+
+  p = &param[TYPE_UDIV_QRNND];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_1;
+  p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
+  p->data = DATA_UDIV_QRNND;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_udiv_qrnnd);
+
+  p = &param[TYPE_UDIV_QRNND_R];
+  COPY (TYPE_UDIV_QRNND);
+  REFERENCE (refmpn_udiv_qrnnd_r);
+
+
+  p = &param[TYPE_DIVEXACT_1];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_LIMB;
+  p->data = DATA_MULTIPLE_DIVISOR;
+  VALIDATE (validate_divexact_1);
+  REFERENCE (refmpn_divmod_1);
+
+  p = &param[TYPE_BDIV_Q_1];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_LIMB;
+  VALIDATE (validate_bdiv_q_1);
+
+  p = &param[TYPE_DIVEXACT_BY3];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_divexact_by3);
+
+  p = &param[TYPE_DIVEXACT_BY3C];
+  COPY (TYPE_DIVEXACT_BY3);
+  p->carry = CARRY_3;
+  REFERENCE (refmpn_divexact_by3c);
+
+
+  p = &param[TYPE_MODEXACT_1_ODD];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_ODD;
+  VALIDATE (validate_modexact_1_odd);
+
+  p = &param[TYPE_MODEXACT_1C_ODD];
+  COPY (TYPE_MODEXACT_1_ODD);
+  p->carry = CARRY_LIMB;
+  VALIDATE (validate_modexact_1c_odd);
+
+
+  p = &param[TYPE_GCD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->data = DATA_NON_ZERO;
+  p->divisor = DIVISOR_LIMB;
+  REFERENCE (refmpn_gcd_1);
+
+  p = &param[TYPE_GCD];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_RETVAL;
+  p->overlap = OVERLAP_NOT_SRCS;
+  p->data = DATA_GCD;
+  REFERENCE (refmpn_gcd);
+
+
+  p = &param[TYPE_MPZ_LEGENDRE];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_ODD_PRIME;
+  p->size2 = 1;
+  p->carry = CARRY_BIT;
+  p->carry_sign = 1;
+  REFERENCE (refmpz_legendre);
+
+  p = &param[TYPE_MPZ_JACOBI];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_ODD;
+  p->size2 = 1;
+  p->carry = CARRY_BIT;
+  p->carry_sign = 1;
+  REFERENCE (refmpz_jacobi);
+
+  p = &param[TYPE_MPZ_KRONECKER];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = 0;
+  p->size2 = 1;
+  p->carry = CARRY_4;
+  p->carry_sign = 1;
+  REFERENCE (refmpz_kronecker);
+
+
+  p = &param[TYPE_MPZ_KRONECKER_UI];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->multiplier = 1;
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpz_kronecker_ui);
+
+  p = &param[TYPE_MPZ_KRONECKER_SI];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_kronecker_si);
+
+  p = &param[TYPE_MPZ_UI_KRONECKER];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_ui_kronecker);
+
+  p = &param[TYPE_MPZ_SI_KRONECKER];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_si_kronecker);
+
+
+  p = &param[TYPE_SQR];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->dst_size[0] = SIZE_SUM;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_sqr);
+
+  p = &param[TYPE_MUL_N];
+  COPY (TYPE_SQR);
+  p->src[1] = 1;
+  REFERENCE (refmpn_mul_n);
+
+  p = &param[TYPE_MULLO_N];
+  COPY (TYPE_MUL_N);
+  p->dst_size[0] = 0;
+  REFERENCE (refmpn_mullo_n);
+
+  p = &param[TYPE_SQRLO];
+  COPY (TYPE_SQR);
+  p->dst_size[0] = 0;
+  REFERENCE (refmpn_sqrlo);
+
+  p = &param[TYPE_MUL_MN];
+  COPY (TYPE_MUL_N);
+  p->size2 = 1;
+  REFERENCE (refmpn_mul_basecase);
+
+  p = &param[TYPE_MULMID_MN];
+  COPY (TYPE_MUL_MN);
+  p->dst_size[0] = SIZE_DIFF_PLUS_3;
+  REFERENCE (refmpn_mulmid_basecase);
+
+  p = &param[TYPE_MULMID_N];
+  COPY (TYPE_MUL_N);
+  p->size = SIZE_ODD;
+  p->size2 = SIZE_CEIL_HALF;
+  p->dst_size[0] = SIZE_DIFF_PLUS_3;
+  REFERENCE (refmpn_mulmid_n);
+
+  p = &param[TYPE_UMUL_PPMM];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_1;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_umul_ppmm);
+
+  p = &param[TYPE_UMUL_PPMM_R];
+  COPY (TYPE_UMUL_PPMM);
+  REFERENCE (refmpn_umul_ppmm_r);
+
+
+  p = &param[TYPE_RSHIFT];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->shift = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  REFERENCE (refmpn_rshift);
+
+  p = &param[TYPE_LSHIFT];
+  COPY (TYPE_RSHIFT);
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  REFERENCE (refmpn_lshift);
+
+  p = &param[TYPE_LSHIFTC];
+  COPY (TYPE_RSHIFT);
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  REFERENCE (refmpn_lshiftc);
+
+
+  p = &param[TYPE_POPCOUNT];
+  p->retval = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_popcount);
+
+  p = &param[TYPE_HAMDIST];
+  COPY (TYPE_POPCOUNT);
+  p->src[1] = 1;
+  REFERENCE (refmpn_hamdist);
+
+
+  p = &param[TYPE_SBPI1_DIV_QR];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_HIGHBIT;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_DIFF;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_sb_div_qr);
+
+  p = &param[TYPE_TDIV_QR];
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_DIFF_PLUS_1;
+  p->dst_size[1] = SIZE_SIZE2;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_tdiv_qr);
+
+  p = &param[TYPE_SQRTREM];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->dst_size[0] = SIZE_CEIL_HALF;
+  p->dst_size[1] = SIZE_RETVAL;
+  p->overlap = OVERLAP_NONE;
+  VALIDATE (validate_sqrtrem);
+  REFERENCE (refmpn_sqrtrem);
+
+  p = &param[TYPE_SQRT];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 0;
+  p->src[0] = 1;
+  p->dst_size[0] = SIZE_CEIL_HALF;
+  p->overlap = OVERLAP_NONE;
+  VALIDATE (validate_sqrt);
+
+  p = &param[TYPE_ZERO];
+  p->dst[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_zero);
+
+  p = &param[TYPE_GET_STR];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->dst_size[0] = SIZE_GET_STR;
+  p->dst_bytes[0] = 1;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_get_str);
+
+  p = &param[TYPE_BINVERT];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->data = DATA_SRC0_ODD;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_binvert);
+
+  p = &param[TYPE_INVERT];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->data = DATA_SRC0_HIGHBIT;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_invert);
+
+#ifdef EXTRA_PARAM_INIT
+  EXTRA_PARAM_INIT
+#endif
+}
+
+
+/* The following are macros if there's no native versions, so wrap them in
+   functions that can be in try_array[]. */
+
+void
+MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY (rp, sp, size); }
+
+void
+MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY_INCR (rp, sp, size); }
+
+void
+MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY_DECR (rp, sp, size); }
+
+void
+__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ __GMPN_COPY (rp, sp, size); }
+
+#ifdef __GMPN_COPY_INCR
+void
+__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ __GMPN_COPY_INCR (rp, sp, size); }
+#endif
+
+void
+mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ mpn_com (rp, sp, size); }
+
+void
+mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_and_n (rp, s1, s2, size); }
+
+void
+mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_andn_n (rp, s1, s2, size); }
+
+void
+mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_nand_n (rp, s1, s2, size); }
+
+void
+mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_ior_n (rp, s1, s2, size); }
+
+void
+mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_iorn_n (rp, s1, s2, size); }
+
+void
+mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_nior_n (rp, s1, s2, size); }
+
+void
+mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_xor_n (rp, s1, s2, size); }
+
+void
+mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_xnor_n (rp, s1, s2, size); }
+
+mp_limb_t
+udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
+{
+  mp_limb_t  q;
+  udiv_qrnnd (q, *remptr, n1, n0, d);
+  return q;
+}
+
+mp_limb_t
+mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_divexact_by3 (rp, sp, size);
+}
+
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+mp_limb_t
+mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+mp_limb_t
+mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+mp_limb_t
+mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_addlsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+mp_limb_t
+mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh1_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+mp_limb_t
+mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh2_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+mp_limb_t
+mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_addlsh_n_ip2 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+mp_limb_t
+mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_sublsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+mp_limb_t
+mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_sublsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+mp_limb_t
+mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_sublsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+
+mp_limb_t
+mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
+{
+  return mpn_modexact_1_odd (ptr, size, divisor);
+}
+
+void
+mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
+  mpn_toom22_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
+  mpn_toom2_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
+  mpn_toom33_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
+  mpn_toom3_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
+  mpn_toom44_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
+  mpn_toom4_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+
+void
+mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+		       mp_size_t size)
+{
+  mp_ptr  tspace;
+  mp_size_t n;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
+  mpn_toom42_mulmid (dst, src1, src2, size, tspace);
+  TMP_FREE;
+}
+
+mp_limb_t
+umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
+{
+  mp_limb_t  high;
+  umul_ppmm (high, *lowptr, m1, m2);
+  return high;
+}
+
+void
+MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
+{ MPN_ZERO (ptr, size); }
+
+mp_size_t
+mpn_sqrt_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{ return mpn_sqrtrem (dst, NULL, src, size); }
+
+struct choice_t {
+  const char  *name;
+  tryfun_t    function;
+  int         type;
+  mp_size_t   minsize;
+};
+
+#define TRY(fun)        #fun, (tryfun_t) fun
+#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
+
+const struct choice_t choice_array[] = {
+  { TRY(mpn_add),       TYPE_ADD    },
+  { TRY(mpn_sub),       TYPE_SUB    },
+
+  { TRY(mpn_add_n),     TYPE_ADD_N  },
+  { TRY(mpn_sub_n),     TYPE_SUB_N  },
+
+#if HAVE_NATIVE_mpn_add_nc
+  { TRY(mpn_add_nc),    TYPE_ADD_NC },
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+  { TRY(mpn_sub_nc),    TYPE_SUB_NC },
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  { TRY(mpn_add_n_sub_n),  TYPE_ADDSUB_N  },
+#endif
+#if HAVE_NATIVE_mpn_add_n_sub_nc
+  { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
+#endif
+
+  { TRY(mpn_add_err1_n),  TYPE_ADD_ERR1_N  },
+  { TRY(mpn_sub_err1_n),  TYPE_SUB_ERR1_N  },
+  { TRY(mpn_add_err2_n),  TYPE_ADD_ERR2_N  },
+  { TRY(mpn_sub_err2_n),  TYPE_SUB_ERR2_N  },
+  { TRY(mpn_add_err3_n),  TYPE_ADD_ERR3_N  },
+  { TRY(mpn_sub_err3_n),  TYPE_SUB_ERR3_N  },
+
+  { TRY(mpn_addmul_1),  TYPE_ADDMUL_1  },
+  { TRY(mpn_submul_1),  TYPE_SUBMUL_1  },
+#if HAVE_NATIVE_mpn_addmul_1c
+  { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
+#endif
+#if HAVE_NATIVE_mpn_submul_1c
+  { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+  { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+  { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+  { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+  { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+  { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+  { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+  { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
+#endif
+
+  { TRY_FUNFUN(mpn_com),  TYPE_COM },
+
+  { TRY_FUNFUN(MPN_COPY),      TYPE_COPY },
+  { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
+  { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
+
+  { TRY_FUNFUN(__GMPN_COPY),      TYPE_COPY },
+#ifdef __GMPN_COPY_INCR
+  { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  { TRY(mpn_copyi), TYPE_COPYI },
+#endif
+#if HAVE_NATIVE_mpn_copyd
+  { TRY(mpn_copyd), TYPE_COPYD },
+#endif
+
+  { TRY(mpn_cnd_add_n), TYPE_ADDCND_N },
+  { TRY(mpn_cnd_sub_n), TYPE_SUBCND_N },
+#if HAVE_NATIVE_mpn_addlsh1_n == 1
+  { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n == 1
+  { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+  { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+  { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+  { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+  { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+  { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+  { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+  { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n == 1
+  { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n == 1
+  { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+  { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+  { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+  { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n == 1
+  { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n == 1
+  { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+  { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+  { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_nc == 1
+  { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc == 1
+  { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc
+  { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc == 1
+  { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc == 1
+  { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc
+  { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_nc
+  { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_nc
+  { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_nc
+  { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
+#endif
+
+  { TRY_FUNFUN(mpn_and_n),  TYPE_AND_N  },
+  { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
+  { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
+  { TRY_FUNFUN(mpn_ior_n),  TYPE_IOR_N  },
+  { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
+  { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
+  { TRY_FUNFUN(mpn_xor_n),  TYPE_XOR_N  },
+  { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
+
+  { TRY(mpn_divrem_1),     TYPE_DIVREM_1 },
+#if USE_PREINV_DIVREM_1
+  { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
+#endif
+  { TRY(mpn_mod_1),        TYPE_MOD_1 },
+#if USE_PREINV_MOD_1
+  { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
+#endif
+#if HAVE_NATIVE_mpn_divrem_1c
+  { TRY(mpn_divrem_1c),    TYPE_DIVREM_1C },
+#endif
+#if HAVE_NATIVE_mpn_mod_1c
+  { TRY(mpn_mod_1c),       TYPE_MOD_1C },
+#endif
+  { TRY(mpn_div_qr_1n_pi1), TYPE_DIV_QR_1N_PI1 },
+#if GMP_NUMB_BITS % 4 == 0
+  { TRY(mpn_mod_34lsub1),  TYPE_MOD_34LSUB1 },
+#endif
+
+  { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+  { TRY(mpn_udiv_qrnnd),    TYPE_UDIV_QRNND, 2 },
+#endif
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+  { TRY(mpn_udiv_qrnnd_r),  TYPE_UDIV_QRNND_R, 2 },
+#endif
+
+  { TRY(mpn_divexact_1),          TYPE_DIVEXACT_1 },
+  { TRY(mpn_bdiv_q_1),            TYPE_BDIV_Q_1 },
+  { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
+  { TRY(mpn_divexact_by3c),       TYPE_DIVEXACT_BY3C },
+
+  { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
+  { TRY(mpn_modexact_1c_odd),       TYPE_MODEXACT_1C_ODD },
+
+
+  { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
+  { TRY(mpn_tdiv_qr),      TYPE_TDIV_QR },
+
+  { TRY(mpn_mul_1),      TYPE_MUL_1 },
+#if HAVE_NATIVE_mpn_mul_1c
+  { TRY(mpn_mul_1c),     TYPE_MUL_1C },
+#endif
+#if HAVE_NATIVE_mpn_mul_2
+  { TRY(mpn_mul_2),      TYPE_MUL_2, 2 },
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+  { TRY(mpn_mul_3),      TYPE_MUL_3, 3 },
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+  { TRY(mpn_mul_4),      TYPE_MUL_4, 4 },
+#endif
+#if HAVE_NATIVE_mpn_mul_5
+  { TRY(mpn_mul_5),      TYPE_MUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+  { TRY(mpn_mul_6),      TYPE_MUL_6, 6 },
+#endif
+
+  { TRY(mpn_rshift),     TYPE_RSHIFT },
+  { TRY(mpn_lshift),     TYPE_LSHIFT },
+  { TRY(mpn_lshiftc),    TYPE_LSHIFTC },
+
+
+  { TRY(mpn_mul_basecase), TYPE_MUL_MN },
+  { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
+  { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
+  { TRY(mpn_sqrlo_basecase), TYPE_SQRLO },
+  { TRY(mpn_sqrlo), TYPE_SQRLO },
+#if SQR_TOOM2_THRESHOLD > 0
+  { TRY(mpn_sqr_basecase), TYPE_SQR },
+#endif
+
+  { TRY(mpn_mul),    TYPE_MUL_MN },
+  { TRY(mpn_mul_n),  TYPE_MUL_N },
+  { TRY(mpn_sqr),    TYPE_SQR },
+
+  { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
+#if HAVE_NATIVE_mpn_umul_ppmm
+  { TRY(mpn_umul_ppmm),    TYPE_UMUL_PPMM, 2 },
+#endif
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+  { TRY(mpn_umul_ppmm_r),  TYPE_UMUL_PPMM_R, 2 },
+#endif
+
+  { TRY_FUNFUN(mpn_toom22_mul),  TYPE_MUL_N,  MPN_TOOM22_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom2_sqr),   TYPE_SQR,    MPN_TOOM2_SQR_MINSIZE },
+  { TRY_FUNFUN(mpn_toom33_mul),  TYPE_MUL_N,  MPN_TOOM33_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom3_sqr),   TYPE_SQR,    MPN_TOOM3_SQR_MINSIZE },
+  { TRY_FUNFUN(mpn_toom44_mul),  TYPE_MUL_N,  MPN_TOOM44_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom4_sqr),   TYPE_SQR,    MPN_TOOM4_SQR_MINSIZE },
+
+  { TRY(mpn_mulmid_n),  TYPE_MULMID_N, 1 },
+  { TRY(mpn_mulmid),  TYPE_MULMID_MN, 1 },
+  { TRY_FUNFUN(mpn_toom42_mulmid),  TYPE_MULMID_N,
+    (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
+
+  { TRY(mpn_gcd_1),        TYPE_GCD_1            },
+  { TRY(mpn_gcd),          TYPE_GCD              },
+  { TRY(mpz_legendre),     TYPE_MPZ_LEGENDRE     },
+  { TRY(mpz_jacobi),       TYPE_MPZ_JACOBI       },
+  { TRY(mpz_kronecker),    TYPE_MPZ_KRONECKER    },
+  { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
+  { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
+  { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
+  { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
+
+  { TRY(mpn_popcount),   TYPE_POPCOUNT },
+  { TRY(mpn_hamdist),    TYPE_HAMDIST },
+
+  { TRY(mpn_sqrtrem),     TYPE_SQRTREM },
+  { TRY_FUNFUN(mpn_sqrt), TYPE_SQRT },
+
+  { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
+
+  { TRY(mpn_get_str),    TYPE_GET_STR },
+
+  { TRY(mpn_binvert),    TYPE_BINVERT },
+  { TRY(mpn_invert),     TYPE_INVERT  },
+
+#ifdef EXTRA_ROUTINES
+  EXTRA_ROUTINES
+#endif
+};
+
+const struct choice_t *choice = NULL;
+
+
+void
+mprotect_maybe (void *addr, size_t len, int prot)
+{
+  if (!option_redzones)
+    return;
+
+#if HAVE_MPROTECT
+  if (mprotect (addr, len, prot) != 0)
+    {
+      fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
+	       addr, (unsigned) len, prot, strerror (errno));
+      exit (1);
+    }
+#else
+  {
+    static int  warned = 0;
+    if (!warned)
+      {
+	fprintf (stderr,
+		 "mprotect not available, bounds testing not performed\n");
+	warned = 1;
+      }
+  }
+#endif
+}
+
+/* round "a" up to a multiple of "m" */
+size_t
+round_up_multiple (size_t a, size_t m)
+{
+  unsigned long  r;
+
+  r = a % m;
+  if (r == 0)
+    return a;
+  else
+    return a + (m - r);
+}
+
+
+/* On some systems it seems that only an mmap'ed region can be mprotect'ed,
+   for instance HP-UX 10.
+
+   mmap will almost certainly return a pointer already aligned to a page
+   boundary, but it's easy enough to share the alignment handling with the
+   malloc case. */
+
+void
+malloc_region (struct region_t *r, mp_size_t n)
+{
+  mp_ptr  p;
+  size_t  nbytes;
+
+  ASSERT ((pagesize % GMP_LIMB_BYTES) == 0);
+
+  n = round_up_multiple (n, PAGESIZE_LIMBS);
+  r->size = n;
+
+  nbytes = n*GMP_LIMB_BYTES + 2*REDZONE_BYTES + pagesize;
+
+#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
+#define MAP_ANON  MAP_ANONYMOUS
+#endif
+
+#if HAVE_MMAP && defined (MAP_ANON)
+  /* note must pass fd=-1 for MAP_ANON on BSD */
+  p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+  if (p == (void *) -1)
+    {
+      fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
+	       (unsigned) nbytes, strerror (errno));
+      exit (1);
+    }
+#else
+  p = (mp_ptr) malloc (nbytes);
+  ASSERT_ALWAYS (p != NULL);
+#endif
+
+  p = (mp_ptr) align_pointer (p, pagesize);
+
+  mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
+  p += REDZONE_LIMBS;
+  r->ptr = p;
+
+  mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
+}
+
+void
+mprotect_region (const struct region_t *r, int prot)
+{
+  mprotect_maybe (r->ptr, r->size, prot);
+}
+
+
+/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
+   and CARRY_4 */
+mp_limb_t  carry_array[] = {
+  0, 1, 2, 3,
+  4,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  GMP_NUMB_MAX
+};
+int        carry_index;
+
+#define CARRY_COUNT                                             \
+  ((tr->carry == CARRY_BIT) ? 2                                 \
+   : tr->carry == CARRY_3   ? 3                                 \
+   : tr->carry == CARRY_4   ? 4                                 \
+   : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR)    \
+     ? numberof(carry_array) + CARRY_RANDOMS                    \
+   : 1)
+
+#define MPN_RANDOM_ALT(index,dst,size) \
+  (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
+
+/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
+   the same type */
+#define CARRY_ITERATION                                                 \
+  for (carry_index = 0;                                                 \
+       (carry_index < numberof (carry_array)                            \
+	? (carry = carry_array[carry_index])                            \
+	: (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)),    \
+	 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0),           \
+	 carry_index < CARRY_COUNT;                                     \
+       carry_index++)
+
+
+mp_limb_t  multiplier_array[] = {
+  0, 1, 2, 3,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  GMP_NUMB_MAX - 2,
+  GMP_NUMB_MAX - 1,
+  GMP_NUMB_MAX
+};
+int        multiplier_index;
+
+mp_limb_t  divisor_array[] = {
+  1, 2, 3,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
+  GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
+  GMP_NUMB_HIGHBIT,
+  GMP_NUMB_HIGHBIT + 1,
+  GMP_NUMB_MAX - 2,
+  GMP_NUMB_MAX - 1,
+  GMP_NUMB_MAX
+};
+
+int        divisor_index;
+
+/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
+   the same type */
+#define ARRAY_ITERATION(var, index, limit, array, randoms, cond)        \
+  for (index = 0;                                                       \
+       (index < numberof (array)                                        \
+	? (var = array[index])                                          \
+	: (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)),            \
+       index < limit;                                                   \
+       index++)
+
+#define MULTIPLIER_COUNT                                \
+  (tr->multiplier                                       \
+    ? numberof (multiplier_array) + MULTIPLIER_RANDOMS  \
+    : 1)
+
+#define MULTIPLIER_ITERATION                                            \
+  ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT,       \
+		  multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
+
+#define DIVISOR_COUNT                           \
+  (tr->divisor                                  \
+   ? numberof (divisor_array) + DIVISOR_RANDOMS \
+   : 1)
+
+#define DIVISOR_ITERATION                                               \
+  ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
+		  DIVISOR_RANDOMS, TRY_DIVISOR)
+
+
+/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
+   d[0] or d[1] respectively, -1 means a separate (write-protected)
+   location. */
+
+struct overlap_t {
+  int  s[NUM_SOURCES];
+} overlap_array[] = {
+  { { -1, -1, -1, -1, -1 } },
+  { {  0, -1, -1, -1, -1 } },
+  { { -1,  0, -1, -1, -1 } },
+  { {  0,  0, -1, -1, -1 } },
+  { {  1, -1, -1, -1, -1 } },
+  { { -1,  1, -1, -1, -1 } },
+  { {  1,  1, -1, -1, -1 } },
+  { {  0,  1, -1, -1, -1 } },
+  { {  1,  0, -1, -1, -1 } },
+};
+
+struct overlap_t  *overlap, *overlap_limit;
+
+#define OVERLAP_COUNT                   \
+  (tr->overlap & OVERLAP_NONE       ? 1 \
+   : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
+   : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
+   : tr->overlap & OVERLAP_NOT_DST2 ? 4	\
+   : tr->dst[1]                     ? 9 \
+   : tr->src[1]                     ? 4 \
+   : tr->dst[0]                     ? 2 \
+   : 1)
+
+#define OVERLAP_ITERATION                               \
+  for (overlap = &overlap_array[0],                     \
+    overlap_limit = &overlap_array[OVERLAP_COUNT];      \
+    overlap < overlap_limit;                            \
+    overlap++)
+
+
+int  base = 10;
+
+#define T_RAND_COUNT  2
+int  t_rand;
+
+void
+t_random (mp_ptr ptr, mp_size_t n)
+{
+  if (n == 0)
+    return;
+
+  switch (option_data) {
+  case DATA_TRAND:
+    switch (t_rand) {
+    case 0: refmpn_random (ptr, n); break;
+    case 1: refmpn_random2 (ptr, n); break;
+    default: abort();
+    }
+    break;
+  case DATA_SEQ:
+    {
+      static mp_limb_t  counter = 0;
+      mp_size_t  i;
+      for (i = 0; i < n; i++)
+	ptr[i] = ++counter;
+    }
+    break;
+  case DATA_ZEROS:
+    refmpn_zero (ptr, n);
+    break;
+  case DATA_FFS:
+    refmpn_fill (ptr, n, GMP_NUMB_MAX);
+    break;
+  case DATA_2FD:
+    /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
+       inducing the q1_ff special case in the mul-by-inverse part of some
+       versions of divrem_1 and mod_1. */
+    refmpn_fill (ptr, n, (mp_limb_t) -1);
+    ptr[n-1] = 2;
+    ptr[0] -= 2;
+    break;
+
+  default:
+    abort();
+  }
+}
+#define T_RAND_ITERATION \
+  for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
+
+
+void
+print_each (const struct each_t *e)
+{
+  int  i;
+
+  printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
+  if (tr->retval)
+    mpn_trace ("   retval", &e->retval, 1);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (tr->dst[i])
+	{
+	  if (tr->dst_bytes[i])
+	    byte_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
+	  else
+	    mpn_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
+	  printf ("        located %p\n", (void *) (e->d[i].p));
+	}
+    }
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    if (tr->src[i])
+      printf ("   s[%d] located %p\n", i, (void *)  (e->s[i].p));
+}
+
+
+void
+print_all (void)
+{
+  int  i;
+
+  printf ("\n");
+  printf ("size  %ld\n", (long) size);
+  if (tr->size2)
+    printf ("size2 %ld\n", (long) size2);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    if (d[i].size != size)
+      printf ("d[%d].size %ld\n", i, (long) d[i].size);
+
+  if (tr->multiplier)
+    mpn_trace ("   multiplier", &multiplier, 1);
+  if (tr->divisor)
+    mpn_trace ("   divisor", &divisor, 1);
+  if (tr->shift)
+    printf ("   shift %lu\n", shift);
+  if (tr->carry)
+    mpn_trace ("   carry", &carry, 1);
+  if (tr->msize)
+    mpn_trace ("   multiplier_N", multiplier_N, tr->msize);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    if (tr->dst[i])
+      printf ("   d[%d] %s, align %ld, size %ld\n",
+	      i, d[i].high ? "high" : "low",
+	      (long) d[i].align, (long) d[i].size);
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (tr->src[i])
+	{
+	  printf ("   s[%d] %s, align %ld, ",
+		  i, s[i].high ? "high" : "low", (long) s[i].align);
+	  switch (overlap->s[i]) {
+	  case -1:
+	    printf ("no overlap\n");
+	    break;
+	  default:
+	    printf ("==d[%d]%s\n",
+		    overlap->s[i],
+		    tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
+		    : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
+		    : "");
+	    break;
+	  }
+	  printf ("   s[%d]=", i);
+	  if (tr->carry_sign && (carry & (1 << i)))
+	    printf ("-");
+	  mpn_trace (NULL, s[i].p, SRC_SIZE(i));
+	}
+    }
+
+  if (tr->dst0_from_src1)
+    mpn_trace ("   d[0]", s[1].region.ptr, size);
+
+  if (tr->reference)
+    print_each (&ref);
+  print_each (&fun);
+}
+
+void
+compare (void)
+{
+  int  error = 0;
+  int  i;
+
+  if (tr->retval && ref.retval != fun.retval)
+    {
+      gmp_printf ("Different return values (%Mu, %Mu)\n",
+		  ref.retval, fun.retval);
+      error = 1;
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      switch (tr->dst_size[i]) {
+      case SIZE_RETVAL:
+      case SIZE_GET_STR:
+	d[i].size = ref.retval;
+	break;
+      }
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (! tr->dst[i])
+	continue;
+
+      if (tr->dst_bytes[i])
+	{
+	  if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
+	    {
+	      printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
+		      i,
+		      (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
+		      (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
+	      error = 1;
+	    }
+	}
+      else
+	{
+	  if (d[i].size != 0
+	      && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
+	    {
+	      printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
+		      i,
+		      (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
+		      (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
+	      error = 1;
+	    }
+	}
+    }
+
+  if (error)
+    {
+      print_all();
+      abort();
+    }
+}
+
+
+/* The functions are cast if the return value should be a long rather than
+   the default mp_limb_t.  This is necessary under _LONG_LONG_LIMB.  This
+   might not be enough if some actual calling conventions checking is
+   implemented on a long long limb system.  */
+
+void
+call (struct each_t *e, tryfun_t function)
+{
+  switch (choice->type) {
+  case TYPE_ADD:
+  case TYPE_SUB:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
+    break;
+
+  case TYPE_ADD_N:
+  case TYPE_SUB_N:
+  case TYPE_ADDLSH1_N:
+  case TYPE_ADDLSH2_N:
+  case TYPE_SUBLSH1_N:
+  case TYPE_SUBLSH2_N:
+  case TYPE_RSBLSH1_N:
+  case TYPE_RSBLSH2_N:
+  case TYPE_RSH1ADD_N:
+  case TYPE_RSH1SUB_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_ADDLSH_N:
+  case TYPE_SUBLSH_N:
+  case TYPE_RSBLSH_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
+    break;
+  case TYPE_ADDLSH_NC:
+  case TYPE_SUBLSH_NC:
+  case TYPE_RSBLSH_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
+    break;
+  case TYPE_ADDLSH1_NC:
+  case TYPE_ADDLSH2_NC:
+  case TYPE_SUBLSH1_NC:
+  case TYPE_SUBLSH2_NC:
+  case TYPE_RSBLSH1_NC:
+  case TYPE_RSBLSH2_NC:
+  case TYPE_ADD_NC:
+  case TYPE_SUB_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+    break;
+  case TYPE_ADDCND_N:
+  case TYPE_SUBCND_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (carry, e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_ADD_ERR1_N:
+  case TYPE_SUB_ERR1_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
+    break;
+  case TYPE_ADD_ERR2_N:
+  case TYPE_SUB_ERR2_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
+    break;
+  case TYPE_ADD_ERR3_N:
+  case TYPE_SUB_ERR3_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
+    break;
+
+  case TYPE_MUL_1:
+  case TYPE_ADDMUL_1:
+  case TYPE_SUBMUL_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier);
+    break;
+  case TYPE_MUL_1C:
+  case TYPE_ADDMUL_1C:
+  case TYPE_SUBMUL_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier, carry);
+    break;
+
+  case TYPE_MUL_2:
+  case TYPE_MUL_3:
+  case TYPE_MUL_4:
+  case TYPE_MUL_5:
+  case TYPE_MUL_6:
+    if (size == 1)
+      abort ();
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier_N);
+    break;
+
+  case TYPE_ADDMUL_2:
+  case TYPE_ADDMUL_3:
+  case TYPE_ADDMUL_4:
+  case TYPE_ADDMUL_5:
+  case TYPE_ADDMUL_6:
+  case TYPE_ADDMUL_7:
+  case TYPE_ADDMUL_8:
+    if (size == 1)
+      abort ();
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier_N);
+    break;
+
+  case TYPE_AND_N:
+  case TYPE_ANDN_N:
+  case TYPE_NAND_N:
+  case TYPE_IOR_N:
+  case TYPE_IORN_N:
+  case TYPE_NIOR_N:
+  case TYPE_XOR_N:
+  case TYPE_XNOR_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+
+  case TYPE_ADDSUB_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_ADDSUB_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
+    break;
+
+  case TYPE_COPY:
+  case TYPE_COPYI:
+  case TYPE_COPYD:
+  case TYPE_COM:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+
+  case TYPE_ADDLSH1_N_IP1:
+  case TYPE_ADDLSH2_N_IP1:
+  case TYPE_ADDLSH1_N_IP2:
+  case TYPE_ADDLSH2_N_IP2:
+  case TYPE_SUBLSH1_N_IP1:
+  case TYPE_SUBLSH2_N_IP1:
+  case TYPE_DIVEXACT_BY3:
+    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+  case TYPE_DIVEXACT_BY3C:
+    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
+						carry);
+    break;
+
+
+  case TYPE_DIVMOD_1:
+  case TYPE_DIVEXACT_1:
+  case TYPE_BDIV_Q_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, divisor);
+    break;
+  case TYPE_DIVMOD_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_DIVREM_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, size2, e->s[0].p, size, divisor);
+    break;
+  case TYPE_DIVREM_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_PREINV_DIVREM_1:
+    {
+      mp_limb_t  dinv;
+      unsigned   shift;
+      shift = refmpn_count_leading_zeros (divisor);
+      dinv = refmpn_invert_limb (divisor << shift);
+      e->retval = CALLING_CONVENTIONS (function)
+	(e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
+    }
+    break;
+  case TYPE_MOD_1:
+  case TYPE_MODEXACT_1_ODD:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor);
+    break;
+  case TYPE_MOD_1C:
+  case TYPE_MODEXACT_1C_ODD:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_PREINV_MOD_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
+    break;
+  case TYPE_DIV_QR_1N_PI1:
+    {
+      mp_limb_t dinv = refmpn_invert_limb (divisor);
+      e->retval = CALLING_CONVENTIONS (function)
+	(e->d[0].p, e->s[0].p, size, e->s[1].p[0], divisor, dinv);
+      break;
+    }
+
+  case TYPE_MOD_34LSUB1:
+    e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
+    break;
+
+  case TYPE_UDIV_QRNND:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
+    break;
+  case TYPE_UDIV_QRNND_R:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
+    break;
+
+  case TYPE_SBPI1_DIV_QR:
+    {
+      gmp_pi1_t dinv;
+      invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
+      refmpn_copyi (e->d[1].p, e->s[0].p, size);        /* dividend */
+      refmpn_fill (e->d[0].p, size-size2, 0x98765432);  /* quotient */
+      e->retval = CALLING_CONVENTIONS (function)
+	(e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
+      refmpn_zero (e->d[1].p+size2, size-size2);    /* excess over remainder */
+    }
+    break;
+
+  case TYPE_TDIV_QR:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
+				    e->s[0].p, size, e->s[1].p, size2);
+    break;
+
+  case TYPE_GCD_1:
+    /* Must have a non-zero src, but this probably isn't the best way to do
+       it. */
+    if (refmpn_zero_p (e->s[0].p, size))
+      e->retval = 0;
+    else
+      e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
+    break;
+
+  case TYPE_GCD:
+    /* Sources are destroyed, so they're saved and replaced, but a general
+       approach to this might be better.  Note that it's still e->s[0].p and
+       e->s[1].p that are passed, to get the desired alignments. */
+    {
+      mp_ptr  s0 = refmpn_malloc_limbs (size);
+      mp_ptr  s1 = refmpn_malloc_limbs (size2);
+      refmpn_copyi (s0, e->s[0].p, size);
+      refmpn_copyi (s1, e->s[1].p, size2);
+
+      mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
+      mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
+      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
+						  e->s[0].p, size,
+						  e->s[1].p, size2);
+      refmpn_copyi (e->s[0].p, s0, size);
+      refmpn_copyi (e->s[1].p, s1, size2);
+      free (s0);
+      free (s1);
+    }
+    break;
+
+  case TYPE_GCD_FINDA:
+    {
+      /* FIXME: do this with a flag */
+      mp_limb_t  c[2];
+      c[0] = e->s[0].p[0];
+      c[0] += (c[0] == 0);
+      c[1] = e->s[0].p[0];
+      c[1] += (c[1] == 0);
+      e->retval = CALLING_CONVENTIONS (function) (c);
+    }
+    break;
+
+  case TYPE_MPZ_LEGENDRE:
+  case TYPE_MPZ_JACOBI:
+    {
+      mpz_t  a, b;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      PTR(b) = e->s[1].p; SIZ(b) = size2;
+      e->retval = CALLING_CONVENTIONS (function) (a, b);
+    }
+    break;
+  case TYPE_MPZ_KRONECKER:
+    {
+      mpz_t  a, b;
+      PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
+      PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
+      e->retval = CALLING_CONVENTIONS (function) (a, b);
+    }
+    break;
+  case TYPE_MPZ_KRONECKER_UI:
+    {
+      mpz_t  a;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
+    }
+    break;
+  case TYPE_MPZ_KRONECKER_SI:
+    {
+      mpz_t  a;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
+    }
+    break;
+  case TYPE_MPZ_UI_KRONECKER:
+    {
+      mpz_t  b;
+      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
+    }
+    break;
+  case TYPE_MPZ_SI_KRONECKER:
+    {
+      mpz_t  b;
+      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
+    }
+    break;
+
+  case TYPE_MUL_MN:
+  case TYPE_MULMID_MN:
+    CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
+    break;
+  case TYPE_MUL_N:
+  case TYPE_MULLO_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_MULMID_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
+				    (size + 1) / 2);
+    break;
+  case TYPE_SQR:
+  case TYPE_SQRLO:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+
+  case TYPE_UMUL_PPMM:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
+    break;
+  case TYPE_UMUL_PPMM_R:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
+    break;
+
+  case TYPE_ADDLSH_N_IP1:
+  case TYPE_ADDLSH_N_IP2:
+  case TYPE_SUBLSH_N_IP1:
+  case TYPE_LSHIFT:
+  case TYPE_LSHIFTC:
+  case TYPE_RSHIFT:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, shift);
+    break;
+
+  case TYPE_POPCOUNT:
+    e->retval = (* (unsigned long (*)(ANYARGS))
+		 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
+    break;
+  case TYPE_HAMDIST:
+    e->retval = (* (unsigned long (*)(ANYARGS))
+		 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
+    break;
+
+  case TYPE_SQRTREM:
+    e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
+      (e->d[0].p, e->d[1].p, e->s[0].p, size);
+    break;
+
+  case TYPE_SQRT:
+    e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
+      (e->d[0].p, e->s[0].p, size);
+    break;
+
+  case TYPE_ZERO:
+    CALLING_CONVENTIONS (function) (e->d[0].p, size);
+    break;
+
+  case TYPE_GET_STR:
+    {
+      size_t  sizeinbase, fill;
+      char    *dst;
+      MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
+      ASSERT_ALWAYS (sizeinbase <= d[0].size);
+      fill = d[0].size - sizeinbase;
+      if (d[0].high)
+	{
+	  memset (e->d[0].p, 0xBA, fill);
+	  dst = (char *) e->d[0].p + fill;
+	}
+      else
+	{
+	  dst = (char *) e->d[0].p;
+	  memset (dst + sizeinbase, 0xBA, fill);
+	}
+      if (POW2_P (base))
+	{
+	  e->retval = CALLING_CONVENTIONS (function) (dst, base,
+						      e->s[0].p, size);
+	}
+      else
+	{
+	  refmpn_copy (e->d[1].p, e->s[0].p, size);
+	  e->retval = CALLING_CONVENTIONS (function) (dst, base,
+						      e->d[1].p, size);
+	}
+      refmpn_zero (e->d[1].p, size);  /* clobbered or unused */
+    }
+    break;
+
+ case TYPE_INVERT:
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
+      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
+      TMP_FREE;
+    }
+    break;
+  case TYPE_BINVERT:
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
+      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
+      TMP_FREE;
+    }
+    break;
+
+#ifdef EXTRA_CALL
+    EXTRA_CALL
+#endif
+
+  default:
+    printf ("Unknown routine type %d\n", choice->type);
+    abort ();
+    break;
+  }
+}
+
+
+void
+pointer_setup (struct each_t *e)
+{
+  int  i, j;
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      switch (tr->dst_size[i]) {
+      case 0:
+      case SIZE_RETVAL: /* will be adjusted later */
+	d[i].size = size;
+	break;
+
+      case SIZE_1:
+	d[i].size = 1;
+	break;
+      case SIZE_2:
+	d[i].size = 2;
+	break;
+      case SIZE_3:
+	d[i].size = 3;
+	break;
+      case SIZE_4:
+	d[i].size = 4;
+	break;
+      case SIZE_6:
+	d[i].size = 6;
+	break;
+
+      case SIZE_PLUS_1:
+	d[i].size = size+1;
+	break;
+      case SIZE_PLUS_MSIZE_SUB_1:
+	d[i].size = size + tr->msize - 1;
+	break;
+
+      case SIZE_SUM:
+	if (tr->size2)
+	  d[i].size = size + size2;
+	else
+	  d[i].size = 2*size;
+	break;
+
+      case SIZE_SIZE2:
+	d[i].size = size2;
+	break;
+
+      case SIZE_DIFF:
+	d[i].size = size - size2;
+	break;
+
+      case SIZE_DIFF_PLUS_1:
+	d[i].size = size - size2 + 1;
+	break;
+
+      case SIZE_DIFF_PLUS_3:
+	d[i].size = size - size2 + 3;
+	break;
+
+      case SIZE_CEIL_HALF:
+	d[i].size = (size+1)/2;
+	break;
+
+      case SIZE_GET_STR:
+	{
+	  mp_limb_t ff = GMP_NUMB_MAX;
+	  MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
+	}
+	break;
+
+      default:
+	printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
+	abort ();
+      }
+    }
+
+  /* establish e->d[].p destinations */
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      mp_size_t  offset = 0;
+
+      /* possible room for overlapping sources */
+      for (j = 0; j < numberof (overlap->s); j++)
+	if (overlap->s[j] == i)
+	  offset = MAX (offset, s[j].align);
+
+      if (d[i].high)
+	{
+	  if (tr->dst_bytes[i])
+	    {
+	      e->d[i].p = (mp_ptr)
+		((char *) (e->d[i].region.ptr + e->d[i].region.size)
+		 - d[i].size - d[i].align);
+	    }
+	  else
+	    {
+	      e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
+		- d[i].size - d[i].align;
+	      if (tr->overlap == OVERLAP_LOW_TO_HIGH)
+		e->d[i].p -= offset;
+	    }
+	}
+      else
+	{
+	  if (tr->dst_bytes[i])
+	    {
+	      e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
+	    }
+	  else
+	    {
+	      e->d[i].p = e->d[i].region.ptr + d[i].align;
+	      if (tr->overlap == OVERLAP_HIGH_TO_LOW)
+		e->d[i].p += offset;
+	    }
+	}
+    }
+
+  /* establish e->s[].p sources */
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      int  o = overlap->s[i];
+      switch (o) {
+      case -1:
+	/* no overlap */
+	e->s[i].p = s[i].p;
+	break;
+      case 0:
+      case 1:
+	/* overlap with d[o] */
+	if (tr->overlap == OVERLAP_HIGH_TO_LOW)
+	  e->s[i].p = e->d[o].p - s[i].align;
+	else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
+	  e->s[i].p = e->d[o].p + s[i].align;
+	else if (tr->size2 == SIZE_FRACTION)
+	  e->s[i].p = e->d[o].p + size2;
+	else
+	  e->s[i].p = e->d[o].p;
+	break;
+      default:
+	abort();
+	break;
+      }
+    }
+}
+
+
+void
+validate_fail (void)
+{
+  if (tr->reference)
+    {
+      trap_location = TRAP_REF;
+      call (&ref, tr->reference);
+      trap_location = TRAP_NOWHERE;
+    }
+
+  print_all();
+  abort();
+}
+
+
+void
+try_one (void)
+{
+  int  i;
+
+  if (option_spinner)
+    spinner();
+  spinner_count++;
+
+  trap_location = TRAP_SETUPS;
+
+  if (tr->divisor == DIVISOR_NORM)
+    divisor |= GMP_NUMB_HIGHBIT;
+  if (tr->divisor == DIVISOR_ODD)
+    divisor |= 1;
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (s[i].high)
+	s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
+      else
+	s[i].p = s[i].region.ptr + s[i].align;
+    }
+
+  pointer_setup (&ref);
+  pointer_setup (&fun);
+
+  ref.retval = 0x04152637;
+  fun.retval = 0x8C9DAEBF;
+
+  t_random (multiplier_N, tr->msize);
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (! tr->src[i])
+	continue;
+
+      mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
+      t_random (s[i].p, SRC_SIZE(i));
+
+      switch (tr->data) {
+      case DATA_NON_ZERO:
+	if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
+	  s[i].p[0] = 1;
+	break;
+
+      case DATA_MULTIPLE_DIVISOR:
+	/* same number of low zero bits as divisor */
+	s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
+	refmpn_sub_1 (s[i].p, s[i].p, size,
+		      refmpn_mod_1 (s[i].p, size, divisor));
+	break;
+
+      case DATA_GCD:
+	/* s[1] no more bits than s[0] */
+	if (i == 1 && size2 == size)
+	  s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
+
+	/* high limb non-zero */
+	s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
+
+	/* odd */
+	s[i].p[0] |= 1;
+	break;
+
+      case DATA_SRC0_ODD:
+	if (i == 0)
+	  s[i].p[0] |= 1;
+	break;
+
+      case DATA_SRC1_ODD:
+	if (i == 1)
+	  s[i].p[0] |= 1;
+	break;
+
+      case DATA_SRC1_ODD_PRIME:
+	if (i == 1)
+	  {
+	    if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
+		&& s[i].p[0] <=3)
+	      s[i].p[0] = 3;
+	    else
+	      {
+		mpz_t p;
+		mpz_init (p);
+		for (;;)
+		  {
+		    _mpz_realloc (p, SRC_SIZE(i));
+		    MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
+		    SIZ(p) = SRC_SIZE(i);
+		    MPN_NORMALIZE (PTR(p), SIZ(p));
+		    mpz_nextprime (p, p);
+		    if (mpz_size (p) <= SRC_SIZE(i))
+		      break;
+
+		    t_random (s[i].p, SRC_SIZE(i));
+		  }
+		MPN_COPY (s[i].p, PTR(p), SIZ(p));
+		if (SIZ(p) < SRC_SIZE(i))
+		  MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
+		mpz_clear (p);
+	      }
+	  }
+	break;
+
+      case DATA_SRC1_HIGHBIT:
+	if (i == 1)
+	  {
+	    if (tr->size2)
+	      s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
+	    else
+	      s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+	  }
+	break;
+
+      case DATA_SRC0_HIGHBIT:
+       if (i == 0)
+	 {
+	   s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+	 }
+       break;
+
+      case DATA_UDIV_QRNND:
+	s[i].p[1] %= divisor;
+	break;
+      case DATA_DIV_QR_1:
+	if (i == 1)
+	  s[i].p[0] %= divisor;
+	break;
+      }
+
+      mprotect_region (&s[i].region, PROT_READ);
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (! tr->dst[i])
+	continue;
+
+      if (tr->dst0_from_src1 && i==0)
+	{
+	  mp_size_t  copy = MIN (d[0].size, SRC_SIZE(1));
+	  mp_size_t  fill = MAX (0, d[0].size - copy);
+	  MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
+	  MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
+	  refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
+	  refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
+	}
+      else if (tr->dst_bytes[i])
+	{
+	  memset (ref.d[i].p, 0xBA, d[i].size);
+	  memset (fun.d[i].p, 0xBA, d[i].size);
+	}
+      else
+	{
+	  refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
+	  refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
+	}
+    }
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (! tr->src[i])
+	continue;
+
+      if (ref.s[i].p != s[i].p)
+	{
+	  refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
+	  refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
+	}
+    }
+
+  if (option_print)
+    print_all();
+
+  if (tr->validate != NULL)
+    {
+      trap_location = TRAP_FUN;
+      call (&fun, choice->function);
+      trap_location = TRAP_NOWHERE;
+
+      if (! CALLING_CONVENTIONS_CHECK ())
+	{
+	  print_all();
+	  abort();
+	}
+
+      (*tr->validate) ();
+    }
+  else
+    {
+      trap_location = TRAP_REF;
+      call (&ref, tr->reference);
+      trap_location = TRAP_FUN;
+      call (&fun, choice->function);
+      trap_location = TRAP_NOWHERE;
+
+      if (! CALLING_CONVENTIONS_CHECK ())
+	{
+	  print_all();
+	  abort();
+	}
+
+      compare ();
+    }
+}
+
+
+#define SIZE_ITERATION                                          \
+  for (size = MAX3 (option_firstsize,                           \
+		    choice->minsize,                            \
+		    (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1),	\
+	 size += (tr->size == SIZE_ODD) && !(size & 1);		\
+       size <= option_lastsize;                                 \
+       size += (tr->size == SIZE_ODD) ? 2 : 1)
+
+#define SIZE2_FIRST                                     \
+  (tr->size2 == SIZE_2 ? 2                              \
+   : tr->size2 == SIZE_FRACTION ? option_firstsize2     \
+   : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2)	\
+   : tr->size2 ?                                        \
+   MAX (choice->minsize, (option_firstsize2 != 0        \
+			  ? option_firstsize2 : 1))     \
+   : 0)
+
+#define SIZE2_LAST                                      \
+  (tr->size2 == SIZE_2 ? 2                              \
+   : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1      \
+   : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2)	\
+   : tr->size2 ? size                                   \
+   : 0)
+
+#define SIZE2_ITERATION \
+  for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
+
+#define ALIGN_COUNT(cond)  ((cond) ? ALIGNMENTS : 1)
+#define ALIGN_ITERATION(w,n,cond) \
+  for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
+
+#define HIGH_LIMIT(cond)  ((cond) != 0)
+#define HIGH_COUNT(cond)  (HIGH_LIMIT (cond) + 1)
+#define HIGH_ITERATION(w,n,cond) \
+  for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
+
+#define SHIFT_LIMIT                                     \
+  ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
+
+#define SHIFT_ITERATION                                 \
+  for (shift = 1; shift <= SHIFT_LIMIT; shift++)
+
+
+void
+try_many (void)
+{
+  int   i;
+
+  {
+    unsigned long  total = 1;
+
+    total *= option_repetitions;
+    total *= option_lastsize;
+    if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
+    else if (tr->size2)             total *= (option_lastsize+1)/2;
+
+    total *= SHIFT_LIMIT;
+    total *= MULTIPLIER_COUNT;
+    total *= DIVISOR_COUNT;
+    total *= CARRY_COUNT;
+    total *= T_RAND_COUNT;
+
+    total *= HIGH_COUNT (tr->dst[0]);
+    total *= HIGH_COUNT (tr->dst[1]);
+    total *= HIGH_COUNT (tr->src[0]);
+    total *= HIGH_COUNT (tr->src[1]);
+
+    total *= ALIGN_COUNT (tr->dst[0]);
+    total *= ALIGN_COUNT (tr->dst[1]);
+    total *= ALIGN_COUNT (tr->src[0]);
+    total *= ALIGN_COUNT (tr->src[1]);
+
+    total *= OVERLAP_COUNT;
+
+    printf ("%s %lu\n", choice->name, total);
+  }
+
+  spinner_count = 0;
+
+  for (i = 0; i < option_repetitions; i++)
+    SIZE_ITERATION
+      SIZE2_ITERATION
+
+      SHIFT_ITERATION
+      MULTIPLIER_ITERATION
+      DIVISOR_ITERATION
+      CARRY_ITERATION /* must be after divisor */
+      T_RAND_ITERATION
+
+      HIGH_ITERATION(d,0, tr->dst[0])
+      HIGH_ITERATION(d,1, tr->dst[1])
+      HIGH_ITERATION(s,0, tr->src[0])
+      HIGH_ITERATION(s,1, tr->src[1])
+
+      ALIGN_ITERATION(d,0, tr->dst[0])
+      ALIGN_ITERATION(d,1, tr->dst[1])
+      ALIGN_ITERATION(s,0, tr->src[0])
+      ALIGN_ITERATION(s,1, tr->src[1])
+
+      OVERLAP_ITERATION
+      try_one();
+
+  printf("\n");
+}
+
+
+/* Usually print_all() doesn't show much, but it might give a hint as to
+   where the function was up to when it died. */
+void
+trap (int sig)
+{
+  const char *name = "noname";
+
+  switch (sig) {
+  case SIGILL:  name = "SIGILL";  break;
+#ifdef SIGBUS
+  case SIGBUS:  name = "SIGBUS";  break;
+#endif
+  case SIGSEGV: name = "SIGSEGV"; break;
+  case SIGFPE:  name = "SIGFPE";  break;
+  }
+
+  printf ("\n\nSIGNAL TRAP: %s\n", name);
+
+  switch (trap_location) {
+  case TRAP_REF:
+    printf ("  in reference function: %s\n", tr->reference_name);
+    break;
+  case TRAP_FUN:
+    printf ("  in test function: %s\n", choice->name);
+    print_all ();
+    break;
+  case TRAP_SETUPS:
+    printf ("  in parameter setups\n");
+    print_all ();
+    break;
+  default:
+    printf ("  somewhere unknown\n");
+    break;
+  }
+  exit (1);
+}
+
+
+void
+try_init (void)
+{
+#if HAVE_GETPAGESIZE
+  /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
+     know _SC_PAGESIZE. */
+  pagesize = getpagesize ();
+#else
+#if HAVE_SYSCONF
+  if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
+    {
+      /* According to the linux man page, sysconf doesn't set errno */
+      fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
+      exit (1);
+    }
+#else
+Error, error, cannot get page size
+#endif
+#endif
+
+  printf ("pagesize is 0x%lX bytes\n", pagesize);
+
+  signal (SIGILL,  trap);
+#ifdef SIGBUS
+  signal (SIGBUS,  trap);
+#endif
+  signal (SIGSEGV, trap);
+  signal (SIGFPE,  trap);
+
+  {
+    int  i;
+
+    for (i = 0; i < NUM_SOURCES; i++)
+      {
+	malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
+	printf ("s[%d] %p to %p (0x%lX bytes)\n",
+		i, (void *) (s[i].region.ptr),
+		(void *) (s[i].region.ptr + s[i].region.size),
+		(long) s[i].region.size * GMP_LIMB_BYTES);
+      }
+
+#define INIT_EACH(e,es)                                                 \
+    for (i = 0; i < NUM_DESTS; i++)                                     \
+      {                                                                 \
+	malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
+	printf ("%s d[%d] %p to %p (0x%lX bytes)\n",                    \
+		es, i, (void *) (e.d[i].region.ptr),			\
+		(void *)  (e.d[i].region.ptr + e.d[i].region.size),	\
+		(long) e.d[i].region.size * GMP_LIMB_BYTES);         \
+      }
+
+    INIT_EACH(ref, "ref");
+    INIT_EACH(fun, "fun");
+  }
+}
+
+int
+strmatch_wild (const char *pattern, const char *str)
+{
+  size_t  plen, slen;
+
+  /* wildcard at start */
+  if (pattern[0] == '*')
+    {
+      pattern++;
+      plen = strlen (pattern);
+      slen = strlen (str);
+      return (plen == 0
+	      || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
+    }
+
+  /* wildcard at end */
+  plen = strlen (pattern);
+  if (plen >= 1 && pattern[plen-1] == '*')
+    return (memcmp (pattern, str, plen-1) == 0);
+
+  /* no wildcards */
+  return (strcmp (pattern, str) == 0);
+}
+
+void
+try_name (const char *name)
+{
+  int  found = 0;
+  int  i;
+
+  for (i = 0; i < numberof (choice_array); i++)
+    {
+      if (strmatch_wild (name, choice_array[i].name))
+	{
+	  choice = &choice_array[i];
+	  tr = &param[choice->type];
+	  try_many ();
+	  found = 1;
+	}
+    }
+
+  if (!found)
+    {
+      printf ("%s unknown\n", name);
+      /* exit (1); */
+    }
+}
+
+
+void
+usage (const char *prog)
+{
+  int  col = 0;
+  int  i;
+
+  printf ("Usage: %s [options] function...\n", prog);
+  printf ("    -1        use limb data 1,2,3,etc\n");
+  printf ("    -9        use limb data all 0xFF..FFs\n");
+  printf ("    -a zeros  use limb data all zeros\n");
+  printf ("    -a ffs    use limb data all 0xFF..FFs (same as -9)\n");
+  printf ("    -a 2fd    use data 0x2FFF...FFFD\n");
+  printf ("    -p        print each case tried (try this if seg faulting)\n");
+  printf ("    -R        seed random numbers from time()\n");
+  printf ("    -r reps   set repetitions (default %d)\n", DEFAULT_REPETITIONS);
+  printf ("    -s size   starting size to test\n");
+  printf ("    -S size2  starting size2 to test\n");
+  printf ("    -s s1-s2  range of sizes to test\n");
+  printf ("    -W        don't show the spinner (use this in gdb)\n");
+  printf ("    -z        disable mprotect() redzones\n");
+  printf ("Default data is refmpn_random() and refmpn_random2().\n");
+  printf ("\n");
+  printf ("Functions that can be tested:\n");
+
+  for (i = 0; i < numberof (choice_array); i++)
+    {
+      if (col + 1 + strlen (choice_array[i].name) > 79)
+	{
+	  printf ("\n");
+	  col = 0;
+	}
+      printf (" %s", choice_array[i].name);
+      col += 1 + strlen (choice_array[i].name);
+    }
+  printf ("\n");
+
+  exit(1);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+
+  /* unbuffered output */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  /* default trace in hex, and in upper-case so can paste into bc */
+  mp_trace_base = -16;
+
+  param_init ();
+
+  {
+    unsigned long  seed = 123;
+    int   opt;
+
+    while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
+      {
+	switch (opt) {
+	case '1':
+	  /* use limb data values 1, 2, 3, ... etc */
+	  option_data = DATA_SEQ;
+	  break;
+	case '9':
+	  /* use limb data values 0xFFF...FFF always */
+	  option_data = DATA_FFS;
+	  break;
+	case 'a':
+	  if (strcmp (optarg, "zeros") == 0)     option_data = DATA_ZEROS;
+	  else if (strcmp (optarg, "seq") == 0)  option_data = DATA_SEQ;
+	  else if (strcmp (optarg, "ffs") == 0)  option_data = DATA_FFS;
+	  else if (strcmp (optarg, "2fd") == 0)  option_data = DATA_2FD;
+	  else
+	    {
+	      fprintf (stderr, "unrecognised data option: %s\n", optarg);
+	      exit (1);
+	    }
+	  break;
+	case 'b':
+	  mp_trace_base = atoi (optarg);
+	  break;
+	case 'E':
+	  /* re-seed */
+	  sscanf (optarg, "%lu", &seed);
+	  printf ("Re-seeding with %lu\n", seed);
+	  break;
+	case 'p':
+	  option_print = 1;
+	  break;
+	case 'R':
+	  /* randomize */
+	  seed = time (NULL);
+	  printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
+	  break;
+	case 'r':
+	  option_repetitions = atoi (optarg);
+	  break;
+	case 's':
+	  {
+	    char  *p;
+	    option_firstsize = strtol (optarg, 0, 0);
+	    if ((p = strchr (optarg, '-')) != NULL)
+	      option_lastsize = strtol (p+1, 0, 0);
+	  }
+	  break;
+	case 'S':
+	  /* -S <size> sets the starting size for the second of a two size
+	     routine (like mpn_mul_basecase) */
+	  option_firstsize2 = strtol (optarg, 0, 0);
+	  break;
+	case 'W':
+	  /* use this when running in the debugger */
+	  option_spinner = 0;
+	  break;
+	case 'z':
+	  /* disable redzones */
+	  option_redzones = 0;
+	  break;
+	case '?':
+	  usage (argv[0]);
+	  break;
+	}
+      }
+
+    gmp_randinit_default (__gmp_rands);
+    __gmp_rands_initialized = 1;
+    gmp_randseed_ui (__gmp_rands, seed);
+  }
+
+  try_init();
+
+  if (argc <= optind)
+    usage (argv[0]);
+
+  for (i = optind; i < argc; i++)
+    try_name (argv[i]);
+
+  return 0;
+}

diff --git a/tests/memory.c b/tests/memory.c
new file mode 100644
index 0000000..4ab54b7
--- /dev/null
+++ b/tests/memory.c

@@ -0,0 +1,246 @@
+/* Memory allocation used during tests.
+
+Copyright 2001, 2002, 2007, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>		/* for abort */
+#include <string.h>		/* for memcpy, memcmp */
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if GMP_LIMB_BITS == 64
+#define PATTERN1 CNST_LIMB(0xcafebabedeadbeef)
+#define PATTERN2 CNST_LIMB(0xabacadabaedeedab)
+#else
+#define PATTERN1 CNST_LIMB(0xcafebabe)
+#define PATTERN2 CNST_LIMB(0xdeadbeef)
+#endif
+
+#if HAVE_INTPTR_T
+#define PTRLIMB(p)  ((mp_limb_t) (intptr_t) p)
+#else
+#define PTRLIMB(p)  ((mp_limb_t) (size_t) p)
+#endif
+
+/* Each block allocated is a separate malloc, for the benefit of a redzoning
+   malloc debugger during development or when bug hunting.
+
+   Sizes passed when reallocating or freeing are checked (the default
+   routines don't care about these).
+
+   Memory leaks are checked by requiring that all blocks have been freed
+   when tests_memory_end() is called.  Test programs must be sure to have
+   "clear"s for all temporary variables used.  */
+
+
+struct header {
+  void           *ptr;
+  size_t         size;
+  struct header  *next;
+};
+
+struct header  *tests_memory_list = NULL;
+
+/* Return a pointer to a pointer to the found block (so it can be updated
+   when unlinking). */
+struct header **
+tests_memory_find (void *ptr)
+{
+  struct header  **hp;
+
+  for (hp = &tests_memory_list; *hp != NULL; hp = &((*hp)->next))
+    if ((*hp)->ptr == ptr)
+      return hp;
+
+  return NULL;
+}
+
+int
+tests_memory_valid (void *ptr)
+{
+  return (tests_memory_find (ptr) != NULL);
+}
+
+void *
+tests_allocate (size_t size)
+{
+  struct header  *h;
+  void *rptr, *ptr;
+  mp_limb_t PATTERN2_var;
+
+  if (size == 0)
+    {
+      fprintf (stderr, "tests_allocate(): attempt to allocate 0 bytes\n");
+      abort ();
+    }
+
+  h = (struct header *) __gmp_default_allocate (sizeof (*h));
+  h->next = tests_memory_list;
+  tests_memory_list = h;
+
+  rptr = __gmp_default_allocate (size + 2 * sizeof (mp_limb_t));
+  ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
+
+  *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+    = PATTERN1 - PTRLIMB (ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
+  memcpy ((void *) ((gmp_intptr_t) ptr + size), &PATTERN2_var, sizeof (mp_limb_t));
+
+  h->size = size;
+  h->ptr = ptr;
+  return h->ptr;
+}
+
+void *
+tests_reallocate (void *ptr, size_t old_size, size_t new_size)
+{
+  struct header  **hp, *h;
+  void *rptr;
+  mp_limb_t PATTERN2_var;
+
+  if (new_size == 0)
+    {
+      fprintf (stderr, "tests_reallocate(): attempt to reallocate %p to 0 bytes\n",
+	       ptr);
+      abort ();
+    }
+
+  hp = tests_memory_find (ptr);
+  if (hp == NULL)
+    {
+      fprintf (stderr, "tests_reallocate(): attempt to reallocate bad pointer %p\n",
+	       ptr);
+      abort ();
+    }
+  h = *hp;
+
+  if (h->size != old_size)
+    {
+      fprintf (stderr, "tests_reallocate(): bad old size %lu, should be %lu\n",
+	       (unsigned long) old_size, (unsigned long) h->size);
+      abort ();
+    }
+
+  if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+      != PATTERN1 - PTRLIMB (ptr))
+    {
+      fprintf (stderr, "in realloc: redzone clobbered before block\n");
+      abort ();
+    }
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
+  if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
+    {
+      fprintf (stderr, "in realloc: redzone clobbered after block\n");
+      abort ();
+    }
+
+  rptr = __gmp_default_reallocate ((void *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)),
+				 old_size + 2 * sizeof (mp_limb_t),
+				 new_size + 2 * sizeof (mp_limb_t));
+  ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
+
+  *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+    = PATTERN1 - PTRLIMB (ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
+  memcpy ((void *) ((gmp_intptr_t) ptr + new_size), &PATTERN2_var, sizeof (mp_limb_t));
+
+  h->size = new_size;
+  h->ptr = ptr;
+  return h->ptr;
+}
+
+struct header **
+tests_free_find (void *ptr)
+{
+  struct header  **hp = tests_memory_find (ptr);
+  if (hp == NULL)
+    {
+      fprintf (stderr, "tests_free(): attempt to free bad pointer %p\n",
+	       ptr);
+      abort ();
+    }
+  return hp;
+}
+
+void
+tests_free_nosize (void *ptr)
+{
+  struct header  **hp = tests_free_find (ptr);
+  struct header  *h = *hp;
+  mp_limb_t PATTERN2_var;
+
+  *hp = h->next;  /* unlink */
+
+  if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+      != PATTERN1 - PTRLIMB (ptr))
+    {
+      fprintf (stderr, "in free: redzone clobbered before block\n");
+      abort ();
+    }
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
+  if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
+    {
+      fprintf (stderr, "in free: redzone clobbered after block\n");
+      abort ();
+    }
+
+  __gmp_default_free ((void *) ((gmp_intptr_t) ptr - sizeof(mp_limb_t)),
+		      h->size + 2 * sizeof (mp_limb_t));
+  __gmp_default_free (h, sizeof (*h));
+}
+
+void
+tests_free (void *ptr, size_t size)
+{
+  struct header  **hp = tests_free_find (ptr);
+  struct header  *h = *hp;
+
+  if (h->size != size)
+    {
+      fprintf (stderr, "tests_free(): bad size %lu, should be %lu\n",
+	       (unsigned long) size, (unsigned long) h->size);
+      abort ();
+    }
+
+  tests_free_nosize (ptr);
+}
+
+void
+tests_memory_start (void)
+{
+  mp_set_memory_functions (tests_allocate, tests_reallocate, tests_free);
+}
+
+void
+tests_memory_end (void)
+{
+  if (tests_memory_list != NULL)
+    {
+      struct header  *h;
+      unsigned  count;
+
+      fprintf (stderr, "tests_memory_end(): not all memory freed\n");
+
+      count = 0;
+      for (h = tests_memory_list; h != NULL; h = h->next)
+	count++;
+
+      fprintf (stderr, "    %u blocks remaining\n", count);
+      abort ();
+    }
+}

diff --git a/tests/misc.c b/tests/misc.c
new file mode 100644
index 0000000..6c40d78
--- /dev/null
+++ b/tests/misc.c

@@ -0,0 +1,608 @@
+/* Miscellaneous test program support routines.
+
+Copyright 2000-2003, 2005, 2013, 2015, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <ctype.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>     /* for getenv */
+#include <string.h>
+
+#if HAVE_FLOAT_H
+#include <float.h>      /* for DBL_MANT_DIG */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>  /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* The various tests setups and final checks, collected up together. */
+void
+tests_start (void)
+{
+  char version[10];
+  snprintf (version, 10, "%u.%u.%u",
+	    __GNU_MP_VERSION,
+	    __GNU_MP_VERSION_MINOR,
+	    __GNU_MP_VERSION_PATCHLEVEL);
+
+  if (strcmp (gmp_version, version) != 0)
+    {
+      fprintf (stderr, "tests are not linked to the newly compiled library\n");
+      fprintf (stderr, "  local version is: %s\n", version);
+      fprintf (stderr, "  linked version is: %s\n", gmp_version);
+      abort ();
+    }
+
+  /* don't buffer, so output is not lost if a test causes a segv etc */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  tests_memory_start ();
+  tests_rand_start ();
+}
+void
+tests_end (void)
+{
+  tests_rand_end ();
+  tests_memory_end ();
+}
+
+static void
+seed_from_tod (gmp_randstate_ptr  rands)
+{
+  unsigned long seed;
+#if HAVE_GETTIMEOFDAY
+  struct timeval  tv;
+  gettimeofday (&tv, NULL);
+  seed = tv.tv_sec ^ ((unsigned long) tv.tv_usec << 12);
+  seed &= 0xffffffff;
+#else
+  time_t  tv;
+  time (&tv);
+  seed = tv;
+#endif
+  gmp_randseed_ui (rands, seed);
+  printf ("Seed GMP_CHECK_RANDOMIZE=%lu (include this in bug reports)\n", seed);
+}
+
+static void
+seed_from_urandom (gmp_randstate_ptr rands, FILE *fs)
+{
+  mpz_t seed;
+  unsigned char buf[6];
+  fread (buf, 1, 6, fs);
+  mpz_init (seed);
+  mpz_import (seed, 6, 1, 1, 0, 0, buf);
+  gmp_randseed (rands, seed);
+  gmp_printf ("Seed GMP_CHECK_RANDOMIZE=%Zd (include this in bug reports)\n", seed);
+  mpz_clear (seed);
+}
+
+void
+tests_rand_start (void)
+{
+  gmp_randstate_ptr  rands;
+  char           *seed_string;
+
+  if (__gmp_rands_initialized)
+    {
+      printf ("Please let tests_start() initialize the global __gmp_rands.\n");
+      printf ("ie. ensure that function is called before the first use of RANDS.\n");
+      abort ();
+    }
+
+  gmp_randinit_default (__gmp_rands);
+  __gmp_rands_initialized = 1;
+  rands = __gmp_rands;
+
+  seed_string = getenv ("GMP_CHECK_RANDOMIZE");
+  if (seed_string != NULL)
+    {
+      if (strcmp (seed_string, "0") != 0 &&
+	  strcmp (seed_string, "1") != 0)
+        {
+	  mpz_t seed;
+	  mpz_init_set_str (seed, seed_string, 0);
+          gmp_printf ("Re-seeding with GMP_CHECK_RANDOMIZE=%Zd\n", seed);
+          gmp_randseed (rands, seed);
+	  mpz_clear (seed);
+        }
+      else
+        {
+	  FILE *fs = fopen ("/dev/urandom", "r");
+	  if (fs != NULL)
+	    {
+	      seed_from_urandom (rands, fs);
+	      fclose (fs);
+	    }
+	  else
+	    seed_from_tod (rands);
+        }
+      fflush (stdout);
+    }
+}
+void
+tests_rand_end (void)
+{
+  RANDS_CLEAR ();
+}
+
+
+/* Only used if CPU calling conventions checking is available. */
+mp_limb_t (*calling_conventions_function) (ANYARGS);
+
+
+/* Return p advanced to the next multiple of "align" bytes.  "align" must be
+   a power of 2.  Care is taken not to assume sizeof(int)==sizeof(pointer).
+   Using "unsigned long" avoids a warning on hpux.  */
+void *
+align_pointer (void *p, size_t align)
+{
+  gmp_intptr_t d;
+  d = ((gmp_intptr_t) p) & (align-1);
+  d = (d != 0 ? align-d : 0);
+  return (void *) (((char *) p) + d);
+}
+
+
+/* Note that memory allocated with this function can never be freed, because
+   the start address of the block allocated is lost. */
+void *
+__gmp_allocate_func_aligned (size_t bytes, size_t align)
+{
+  return align_pointer ((*__gmp_allocate_func) (bytes + align-1), align);
+}
+
+
+void *
+__gmp_allocate_or_reallocate (void *ptr, size_t oldsize, size_t newsize)
+{
+  if (ptr == NULL)
+    return (*__gmp_allocate_func) (newsize);
+  else
+    return (*__gmp_reallocate_func) (ptr, oldsize, newsize);
+}
+
+char *
+__gmp_allocate_strdup (const char *s)
+{
+  size_t  len;
+  char    *t;
+  len = strlen (s);
+  t = (char *) (*__gmp_allocate_func) (len+1);
+  memcpy (t, s, len+1);
+  return t;
+}
+
+
+char *
+strtoupper (char *s_orig)
+{
+  char  *s;
+  for (s = s_orig; *s != '\0'; s++)
+    if (islower (*s))
+      *s = toupper (*s);
+  return s_orig;
+}
+
+
+void
+mpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)
+{
+  ASSERT (size >= 0);
+  MPN_NORMALIZE (p, size);
+  MPZ_REALLOC (z, size);
+  MPN_COPY (PTR(z), p, size);
+  SIZ(z) = size;
+}
+
+void
+mpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)
+{
+  ASSERT (size >= 0);
+
+  MPN_NORMALIZE (p, size);
+  ALLOC(z) = MAX (size, 1);
+  PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z));
+  SIZ(z) = size;
+  MPN_COPY (PTR(z), p, size);
+}
+
+
+/* Find least significant limb position where p1,size and p2,size differ.  */
+mp_size_t
+mpn_diff_lowest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    if (p1[i] != p2[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find most significant limb position where p1,size and p2,size differ.  */
+mp_size_t
+mpn_diff_highest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = size-1; i >= 0; i--)
+    if (p1[i] != p2[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find least significant byte position where p1,size and p2,size differ.  */
+mp_size_t
+byte_diff_lowest (const void *p1, const void *p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    if (((const char *) p1)[i] != ((const char *) p2)[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find most significant byte position where p1,size and p2,size differ.  */
+mp_size_t
+byte_diff_highest (const void *p1, const void *p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = size-1; i >= 0; i--)
+    if (((const char *) p1)[i] != ((const char *) p2)[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+void
+mpz_set_str_or_abort (mpz_ptr z, const char *str, int base)
+{
+  if (mpz_set_str (z, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR: mpz_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+void
+mpq_set_str_or_abort (mpq_ptr q, const char *str, int base)
+{
+  if (mpq_set_str (q, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR: mpq_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+void
+mpf_set_str_or_abort (mpf_ptr f, const char *str, int base)
+{
+  if (mpf_set_str (f, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR mpf_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+
+/* Whether the absolute value of z is a power of 2. */
+int
+mpz_pow2abs_p (mpz_srcptr z)
+{
+  mp_size_t  size, i;
+  mp_srcptr  ptr;
+
+  size = SIZ (z);
+  if (size == 0)
+    return 0;  /* zero is not a power of 2 */
+  size = ABS (size);
+
+  ptr = PTR (z);
+  for (i = 0; i < size-1; i++)
+    if (ptr[i] != 0)
+      return 0;  /* non-zero low limb means not a power of 2 */
+
+  return POW2_P (ptr[i]);  /* high limb power of 2 */
+}
+
+
+/* Exponentially distributed between 0 and 2^nbits-1, meaning the number of
+   bits in the result is uniformly distributed between 0 and nbits-1.
+
+   FIXME: This is not a proper exponential distribution, since the
+   probability function will have a stepped shape due to using a uniform
+   distribution after choosing how many bits.  */
+
+void
+mpz_erandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_urandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));
+}
+
+void
+mpz_erandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_erandomb (rop, rstate, nbits);
+  if (mpz_sgn (rop) == 0)
+    mpz_set_ui (rop, 1L);
+}
+
+void
+mpz_errandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_rrandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));
+}
+
+void
+mpz_errandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_errandomb (rop, rstate, nbits);
+  if (mpz_sgn (rop) == 0)
+    mpz_set_ui (rop, 1L);
+}
+
+void
+mpz_negrandom (mpz_ptr rop, gmp_randstate_t rstate)
+{
+  mp_limb_t  n;
+  _gmp_rand (&n, rstate, 1);
+  if (n != 0)
+    mpz_neg (rop, rop);
+}
+
+void
+mpz_clobber(mpz_ptr rop)
+{
+  MPN_ZERO(PTR(rop), ALLOC(rop));
+  PTR(rop)[0] = 0xDEADBEEF;
+  SIZ(rop) = 0xDEFACE;
+}
+
+mp_limb_t
+urandom (void)
+{
+#if GMP_NAIL_BITS == 0
+  mp_limb_t  n;
+  _gmp_rand (&n, RANDS, GMP_LIMB_BITS);
+  return n;
+#else
+  mp_limb_t n[2];
+  _gmp_rand (n, RANDS, GMP_LIMB_BITS);
+  return n[0] + (n[1] << GMP_NUMB_BITS);
+#endif
+}
+
+
+/* Call (*func)() with various random number generators. */
+void
+call_rand_algs (void (*func) (const char *, gmp_randstate_ptr))
+{
+  gmp_randstate_t  rstate;
+  mpz_t            a;
+
+  mpz_init (a);
+
+  gmp_randinit_default (rstate);
+  (*func) ("gmp_randinit_default", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_mt (rstate);
+  (*func) ("gmp_randinit_mt", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 8L);
+  (*func) ("gmp_randinit_lc_2exp_size 8", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 16L);
+  (*func) ("gmp_randinit_lc_2exp_size 16", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 128L);
+  (*func) ("gmp_randinit_lc_2exp_size 128", rstate);
+  gmp_randclear (rstate);
+
+  /* degenerate always zeros */
+  mpz_set_ui (a, 0L);
+  gmp_randinit_lc_2exp (rstate, a, 0L, 8L);
+  (*func) ("gmp_randinit_lc_2exp a=0 c=0 m=8", rstate);
+  gmp_randclear (rstate);
+
+  /* degenerate always FFs */
+  mpz_set_ui (a, 0L);
+  gmp_randinit_lc_2exp (rstate, a, 0xFFL, 8L);
+  (*func) ("gmp_randinit_lc_2exp a=0 c=0xFF m=8", rstate);
+  gmp_randclear (rstate);
+
+  mpz_clear (a);
+}
+
+
+/* Return +infinity if available, or 0 if not.
+   We don't want to use libm, so INFINITY or other system values are not
+   used here.  */
+double
+tests_infinity_d (void)
+{
+#if _GMP_IEEE_FLOATS
+  union ieee_double_extract x;
+  x.s.exp = 2047;
+  x.s.manl = 0;
+  x.s.manh = 0;
+  x.s.sig = 0;
+  return x.d;
+#else
+  return 0;
+#endif
+}
+
+
+/* Return non-zero if d is an infinity (either positive or negative).
+   Don't want libm, so don't use isinf() or other system tests.  */
+int
+tests_isinf (double d)
+{
+#if _GMP_IEEE_FLOATS
+  union ieee_double_extract x;
+  x.d = d;
+  return (x.s.exp == 2047 && x.s.manl == 0 && x.s.manh == 0);
+#else
+  return 0;
+#endif
+}
+
+
+/* Set the hardware floating point rounding mode.  Same mode values as mpfr,
+   namely 0=nearest, 1=tozero, 2=up, 3=down.  Return 1 if successful, 0 if
+   not.  */
+int
+tests_hardware_setround (int mode)
+{
+#if ! defined NO_ASM && HAVE_HOST_CPU_FAMILY_x86
+  int  rc;
+  switch (mode) {
+  case 0: rc = 0; break;  /* nearest */
+  case 1: rc = 3; break;  /* tozero  */
+  case 2: rc = 2; break;  /* up      */
+  case 3: rc = 1; break;  /* down    */
+  default:
+    return 0;
+  }
+  x86_fldcw ((x86_fstcw () & ~0xC00) | (rc << 10));
+  return 1;
+#endif
+
+  return 0;
+}
+
+/* Return the hardware floating point rounding mode, or -1 if unknown. */
+int
+tests_hardware_getround (void)
+{
+#if ! defined NO_ASM && HAVE_HOST_CPU_FAMILY_x86
+  switch ((x86_fstcw () & ~0xC00) >> 10) {
+  case 0: return 0; break;  /* nearest */
+  case 1: return 3; break;  /* down    */
+  case 2: return 2; break;  /* up      */
+  case 3: return 1; break;  /* tozero  */
+  }
+#endif
+
+  return -1;
+}
+
+
+/* tests_dbl_mant_bits() determines by experiment the number of bits in the
+   mantissa of a "double".  If it's not possible to find a value (perhaps
+   due to the compiler optimizing too aggressively), then return 0.
+
+   This code is used rather than DBL_MANT_DIG from <float.h> since ancient
+   systems like SunOS don't have that file, and since one GNU/Linux ARM
+   system was seen where the float emulation seemed to have only 32 working
+   bits, not the 53 float.h claimed.  */
+
+int
+tests_dbl_mant_bits (void)
+{
+  static int n = -1;
+  volatile double x, y, d;
+
+  if (n != -1)
+    return n;
+
+  n = 1;
+  x = 2.0;
+  for (;;)
+    {
+      /* see if 2^(n+1)+1 can be formed without rounding, if so then
+         continue, if not then "n" is the answer */
+      y = x + 1.0;
+      d = y - x;
+      if (d != 1.0)
+        {
+#if defined (DBL_MANT_DIG) && DBL_RADIX == 2
+          if (n != DBL_MANT_DIG)
+            printf ("Warning, tests_dbl_mant_bits got %d but DBL_MANT_DIG says %d\n", n, DBL_MANT_DIG);
+#endif
+          break;
+        }
+
+      x *= 2;
+      n++;
+
+      if (n > 1000)
+        {
+          printf ("Oops, tests_dbl_mant_bits can't determine mantissa size\n");
+          n = 0;
+          break;
+        }
+    }
+  return n;
+}
+
+
+/* See tests_setjmp_sigfpe in tests.h. */
+
+jmp_buf    tests_sigfpe_target;
+
+RETSIGTYPE
+tests_sigfpe_handler (int sig)
+{
+  longjmp (tests_sigfpe_target, 1);
+}
+
+void
+tests_sigfpe_done (void)
+{
+  signal (SIGFPE, SIG_DFL);
+}

diff --git a/tests/misc/t-locale.c b/tests/misc/t-locale.c
new file mode 100644
index 0000000..c5b0b3c
--- /dev/null
+++ b/tests/misc/t-locale.c

@@ -0,0 +1,197 @@
+/* Test locale support, or attempt to do so.
+
+Copyright 2001, 2002, 2011, 2014, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_NL_TYPES_H
+#include <nl_types.h>  /* for nl_item (on netbsd 1.4.1 at least) */
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for lconv */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+const char *decimal_point;
+
+/* Replace the libc localeconv with one we can manipulate. */
+#if HAVE_LOCALECONV && ! defined __MINGW32__
+struct lconv *
+localeconv (void)
+#if defined __cplusplus && defined __GLIBC__
+  throw()
+#endif
+{
+  static struct lconv  l;
+  l.decimal_point = (char *) decimal_point;
+  return &l;
+}
+#endif
+
+/* Replace the libc nl_langinfo with one we can manipulate. */
+#if HAVE_NL_LANGINFO && ! defined __TERMUX__
+char *
+nl_langinfo (nl_item n)
+#if defined __cplusplus && defined __GLIBC__
+  throw()
+#endif
+{
+#if defined (DECIMAL_POINT)
+  if (n == DECIMAL_POINT)
+    return (char *) decimal_point;
+#endif
+#if defined (RADIXCHAR)
+  if (n == RADIXCHAR)
+    return (char *) decimal_point;
+#endif
+  return (char *) "";
+}
+#endif
+
+void
+check_input (void)
+{
+  static const char *point[] = {
+    ".", ",", "WU", "STR", "ZTV***"
+  };
+
+  static const struct {
+    const char  *str;
+    double      d;
+  } data[] = {
+
+    { "1%s",   1.0 },
+    { "1%s0",  1.0 },
+    { "1%s00", 1.0 },
+
+    { "%s5",    0.5 },
+    { "0%s5",   0.5 },
+    { "00%s5",  0.5 },
+    { "00%s50", 0.5 },
+
+    { "1%s5",    1.5 },
+    { "1%s5e1", 15.0 },
+  };
+
+  int     i, j, neg, ret;
+  char    str[128];
+  mpf_t   f;
+  double  d;
+
+  mpf_init (f);
+
+  for (i = 0; i < numberof (point); i++)
+    {
+      decimal_point = (const char *) point[i];
+
+      for (neg = 0; neg <= 1; neg++)
+        {
+          for (j = 0; j < numberof (data); j++)
+            {
+              strcpy (str, neg ? "-" : "");
+              sprintf (str+strlen(str), data[j].str, decimal_point);
+
+              d = data[j].d;
+              if (neg)
+                d = -d;
+
+              mpf_set_d (f, 123.0);
+              if (mpf_set_str (f, str, 10) != 0)
+                {
+                  printf ("mpf_set_str error\n");
+                  printf ("  point  %s\n", decimal_point);
+                  printf ("  str    %s\n", str);
+                  abort ();
+                }
+              if (mpf_cmp_d (f, d) != 0)
+                {
+                  printf    ("mpf_set_str wrong result\n");
+                  printf    ("  point  %s\n", decimal_point);
+                  printf    ("  str    %s\n", str);
+                  mpf_trace ("  f", f);
+                  printf    ("  d=%g\n", d);
+                  abort ();
+                }
+
+              mpf_set_d (f, 123.0);
+              ret = gmp_sscanf (str, "%Ff", f);
+              if (ret != 1)
+                {
+                  printf ("gmp_sscanf wrong return value\n");
+                  printf ("  point  %s\n", decimal_point);
+                  printf ("  str    %s\n", str);
+                  printf ("  ret    %d\n", ret);
+                  abort ();
+                }
+              if (mpf_cmp_d (f, d) != 0)
+                {
+                  printf    ("gmp_sscanf wrong result\n");
+                  printf    ("  point  %s\n", decimal_point);
+                  printf    ("  str    %s\n", str);
+                  mpf_trace ("  f", f);
+                  printf    ("  d=%g\n", d);
+                  abort ();
+                }
+            }
+        }
+    }
+  mpf_clear (f);
+}
+
+int
+main (void)
+{
+  /* The localeconv replacement breaks printf "%lu" on SunOS 4, so we can't
+     print the seed in tests_rand_start().  Nothing random is used in this
+     program though, so just use the memory tests alone.  */
+  tests_memory_start ();
+
+  {
+    mpf_t  f;
+    char   buf[128];
+    mpf_init (f);
+    decimal_point = ",";
+    mpf_set_d (f, 1.5);
+    gmp_snprintf (buf, sizeof(buf), "%.1Ff", f);
+    mpf_clear (f);
+    if (strcmp (buf, "1,5") != 0)
+      {
+        printf ("Test skipped, replacing localeconv/nl_langinfo doesn't work\n");
+        goto done;
+      }
+  }
+
+  check_input ();
+
+ done:
+  tests_memory_end ();
+  exit (0);
+}

diff --git a/tests/misc/t-printf.c b/tests/misc/t-printf.c
new file mode 100644
index 0000000..e13a23e
--- /dev/null
+++ b/tests/misc/t-printf.c

@@ -0,0 +1,955 @@
+/* Test gmp_printf and related functions.
+
+Copyright 2001-2003, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Usage: t-printf [-s]
+
+   -s  Check the data against the system printf, where possible.  This is
+       only an option since we don't want to fail if the system printf is
+       faulty or strange.  */
+
+
+#include "config.h"	/* needed for the HAVE_, could also move gmp incls */
+
+#include <stdarg.h>
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_OBSTACK_VPRINTF
+#define obstack_chunk_alloc tests_allocate
+#define obstack_chunk_free  tests_free_nosize
+#include <obstack.h>
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int   option_check_printf = 0;
+
+
+#define CHECK_VFPRINTF_FILENAME  "/tmp/t-printf.tmp"
+FILE  *check_vfprintf_fp;
+
+
+/* From any of the tests run here. */
+#define MAX_OUTPUT  1024
+
+
+void
+check_plain (const char *want, const char *fmt_orig, ...)
+{
+  char        got[MAX_OUTPUT];
+  int         got_len, want_len;
+  size_t      fmtsize;
+  char        *fmt, *q;
+  const char  *p;
+  va_list     ap;
+  va_start (ap, fmt_orig);
+
+  if (! option_check_printf)
+    return;
+
+  fmtsize = strlen (fmt_orig) + 1;
+  fmt = (char *) (*__gmp_allocate_func) (fmtsize);
+
+  for (p = fmt_orig, q = fmt; *p != '\0'; p++)
+    {
+      switch (*p) {
+      case 'a':
+      case 'A':
+	/* The exact value of the exponent isn't guaranteed in glibc, and it
+	   and gmp_printf do slightly different things, so don't compare
+	   directly. */
+	goto done;
+      case 'F':
+	if (p > fmt_orig && *(p-1) == '.')
+	  goto done;  /* don't test the "all digits" cases */
+	/* discard 'F' type */
+	break;
+      case 'Z':
+	/* transmute */
+	*q++ = 'l';
+	break;
+      default:
+	*q++ = *p;
+	break;
+      }
+    }
+  *q = '\0';
+
+  want_len = strlen (want);
+  ASSERT_ALWAYS (want_len < sizeof(got));
+
+  got_len = vsprintf (got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("wanted data doesn't match plain vsprintf\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+
+ done:
+  (*__gmp_free_func) (fmt, fmtsize);
+}
+
+void
+check_vsprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  got[MAX_OUTPUT];
+  int   got_len, want_len;
+
+  want_len = strlen (want);
+  got_len = gmp_vsprintf (got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("gmp_vsprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+}
+
+void
+check_vfprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  got[MAX_OUTPUT];
+  int   got_len, want_len, fread_len;
+  long  ftell_len;
+
+  want_len = strlen (want);
+
+  rewind (check_vfprintf_fp);
+  got_len = gmp_vfprintf (check_vfprintf_fp, fmt, ap);
+  ASSERT_ALWAYS (got_len != -1);
+  ASSERT_ALWAYS (fflush (check_vfprintf_fp) == 0);
+
+  ftell_len = ftell (check_vfprintf_fp);
+  ASSERT_ALWAYS (ftell_len != -1);
+
+  rewind (check_vfprintf_fp);
+  ASSERT_ALWAYS (ftell_len <= sizeof(got));
+  fread_len = fread (got, 1, ftell_len, check_vfprintf_fp);
+
+  if (got_len != want_len
+      || ftell_len != want_len
+      || fread_len != want_len
+      || memcmp (got, want, want_len) != 0)
+    {
+      printf ("gmp_vfprintf wrong\n");
+      printf ("  fmt       |%s|\n", fmt);
+      printf ("  got       |%.*s|\n", fread_len, got);
+      printf ("  want      |%s|\n", want);
+      printf ("  got_len   %d\n", got_len);
+      printf ("  ftell_len %ld\n", ftell_len);
+      printf ("  fread_len %d\n", fread_len);
+      printf ("  want_len  %d\n", want_len);
+      abort ();
+    }
+}
+
+void
+check_vsnprintf (const char *want, const char *fmt, va_list ap)
+{
+  char    got[MAX_OUTPUT+1];
+  int     ret, got_len, want_len;
+  size_t  bufsize;
+
+  want_len = strlen (want);
+
+  bufsize = -1;
+  for (;;)
+    {
+      /* do 0 to 5, then want-5 to want+5 */
+      bufsize++;
+      if (bufsize > 5 && bufsize < want_len-5)
+	bufsize = want_len-5;
+      if (bufsize > want_len + 5)
+	break;
+      ASSERT_ALWAYS (bufsize+1 <= sizeof (got));
+
+      got[bufsize] = '!';
+      ret = gmp_vsnprintf (got, bufsize, fmt, ap);
+
+      got_len = MIN (MAX(1,bufsize)-1, want_len);
+
+      if (got[bufsize] != '!')
+	{
+	  printf ("gmp_vsnprintf overwrote bufsize sentinel\n");
+	  goto error;
+	}
+
+      if (ret != want_len)
+	{
+	  printf ("gmp_vsnprintf return value wrong\n");
+	  goto error;
+	}
+
+      if (bufsize > 0)
+	{
+	  if (memcmp (got, want, got_len) != 0 || got[got_len] != '\0')
+	    {
+	      printf ("gmp_vsnprintf wrong result string\n");
+	    error:
+	      printf ("  fmt       |%s|\n", fmt);
+	      printf ("  bufsize   %lu\n", (unsigned long) bufsize);
+	      printf ("  got       |%s|\n", got);
+	      printf ("  want      |%.*s|\n", got_len, want);
+	      printf ("  want full |%s|\n", want);
+	      printf ("  ret       %d\n", ret);
+	      printf ("  want_len  %d\n", want_len);
+	      abort ();
+	    }
+	}
+    }
+}
+
+void
+check_vasprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  *got;
+  int   got_len, want_len;
+
+  want_len = strlen (want);
+  got_len = gmp_vasprintf (&got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("gmp_vasprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+  (*__gmp_free_func) (got, strlen(got)+1);
+}
+
+void
+check_obstack_vprintf (const char *want, const char *fmt, va_list ap)
+{
+#if HAVE_OBSTACK_VPRINTF
+  struct obstack  ob;
+  int   got_len, want_len, ob_len;
+  char  *got;
+
+  want_len = strlen (want);
+
+  obstack_init (&ob);
+  got_len = gmp_obstack_vprintf (&ob, fmt, ap);
+  got = (char *) obstack_base (&ob);
+  ob_len = obstack_object_size (&ob);
+
+  if (got_len != want_len
+      || ob_len != want_len
+      || memcmp (got, want, want_len) != 0)
+    {
+      printf ("gmp_obstack_vprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  ob_len   %d\n", ob_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+  obstack_free (&ob, NULL);
+#endif
+}
+
+
+void
+check_one (const char *want, const char *fmt, ...)
+{
+  va_list ap;
+  va_start (ap, fmt);
+
+  /* simplest first */
+  check_vsprintf (want, fmt, ap);
+  check_vfprintf (want, fmt, ap);
+  check_vsnprintf (want, fmt, ap);
+  check_vasprintf (want, fmt, ap);
+  check_obstack_vprintf (want, fmt, ap);
+}
+
+
+#define hex_or_octal_p(fmt)             \
+  (strchr (fmt, 'x') != NULL            \
+   || strchr (fmt, 'X') != NULL         \
+   || strchr (fmt, 'o') != NULL)
+
+void
+check_z (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *z;
+    const char  *want;
+  } data[] = {
+    { "%Zd", "0",    "0" },
+    { "%Zd", "1",    "1" },
+    { "%Zd", "123",  "123" },
+    { "%Zd", "-1",   "-1" },
+    { "%Zd", "-123", "-123" },
+
+    { "%+Zd", "0",      "+0" },
+    { "%+Zd", "123",  "+123" },
+    { "%+Zd", "-123", "-123" },
+
+    { "%Zx",  "123",   "7b" },
+    { "%ZX",  "123",   "7B" },
+    { "%Zx", "-123",  "-7b" },
+    { "%ZX", "-123",  "-7B" },
+    { "%Zo",  "123",  "173" },
+    { "%Zo", "-123", "-173" },
+
+    { "%#Zx",    "0",     "0" },
+    { "%#ZX",    "0",     "0" },
+    { "%#Zx",  "123",  "0x7b" },
+    { "%#ZX",  "123",  "0X7B" },
+    { "%#Zx", "-123", "-0x7b" },
+    { "%#ZX", "-123", "-0X7B" },
+
+    { "%#Zo",    "0",     "0" },
+    { "%#Zo",  "123",  "0173" },
+    { "%#Zo", "-123", "-0173" },
+
+    { "%10Zd",      "0", "         0" },
+    { "%10Zd",    "123", "       123" },
+    { "%10Zd",   "-123", "      -123" },
+
+    { "%-10Zd",     "0", "0         " },
+    { "%-10Zd",   "123", "123       " },
+    { "%-10Zd",  "-123", "-123      " },
+
+    { "%+10Zd",   "123", "      +123" },
+    { "%+-10Zd",  "123", "+123      " },
+    { "%+10Zd",  "-123", "      -123" },
+    { "%+-10Zd", "-123", "-123      " },
+
+    { "%08Zd",    "0", "00000000" },
+    { "%08Zd",  "123", "00000123" },
+    { "%08Zd", "-123", "-0000123" },
+
+    { "%+08Zd",    "0", "+0000000" },
+    { "%+08Zd",  "123", "+0000123" },
+    { "%+08Zd", "-123", "-0000123" },
+
+    { "%#08Zx",    "0", "00000000" },
+    { "%#08Zx",  "123", "0x00007b" },
+    { "%#08Zx", "-123", "-0x0007b" },
+
+    { "%+#08Zx",    "0", "+0000000" },
+    { "%+#08Zx",  "123", "+0x0007b" },
+    { "%+#08Zx", "-123", "-0x0007b" },
+
+    { "%.0Zd", "0", "" },
+    { "%.1Zd", "0", "0" },
+    { "%.2Zd", "0", "00" },
+    { "%.3Zd", "0", "000" },
+  };
+
+  int        i, j;
+  mpz_t      z;
+  char       *nfmt;
+  mp_size_t  nsize, zeros;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (z, data[i].z, 0);
+
+      /* don't try negatives or forced sign in hex or octal */
+      if (mpz_fits_slong_p (z)
+	  && ! (hex_or_octal_p (data[i].fmt)
+		&& (strchr (data[i].fmt, '+') != NULL || mpz_sgn(z) < 0)))
+	{
+	  check_plain (data[i].want, data[i].fmt, mpz_get_si (z));
+	}
+
+      check_one (data[i].want, data[i].fmt, z);
+
+      /* Same again, with %N and possibly some high zero limbs */
+      nfmt = __gmp_allocate_strdup (data[i].fmt);
+      for (j = 0; nfmt[j] != '\0'; j++)
+	if (nfmt[j] == 'Z')
+	  nfmt[j] = 'N';
+      for (zeros = 0; zeros <= 3; zeros++)
+	{
+	  nsize = ABSIZ(z)+zeros;
+	  MPZ_REALLOC (z, nsize);
+	  nsize = (SIZ(z) >= 0 ? nsize : -nsize);
+	  refmpn_zero (PTR(z)+ABSIZ(z), zeros);
+	  check_one (data[i].want, nfmt, PTR(z), nsize);
+	}
+      __gmp_free_func (nfmt, strlen(nfmt)+1);
+    }
+
+  mpz_clear (z);
+}
+
+void
+check_q (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *q;
+    const char  *want;
+  } data[] = {
+    { "%Qd",    "0",    "0" },
+    { "%Qd",    "1",    "1" },
+    { "%Qd",  "123",  "123" },
+    { "%Qd",   "-1",   "-1" },
+    { "%Qd", "-123", "-123" },
+    { "%Qd",  "3/2",  "3/2" },
+    { "%Qd", "-3/2", "-3/2" },
+
+    { "%+Qd", "0",      "+0" },
+    { "%+Qd", "123",  "+123" },
+    { "%+Qd", "-123", "-123" },
+    { "%+Qd", "5/8",  "+5/8" },
+    { "%+Qd", "-5/8", "-5/8" },
+
+    { "%Qx",  "123",   "7b" },
+    { "%QX",  "123",   "7B" },
+    { "%Qx",  "15/16", "f/10" },
+    { "%QX",  "15/16", "F/10" },
+    { "%Qx", "-123",  "-7b" },
+    { "%QX", "-123",  "-7B" },
+    { "%Qx", "-15/16", "-f/10" },
+    { "%QX", "-15/16", "-F/10" },
+    { "%Qo",  "123",  "173" },
+    { "%Qo", "-123", "-173" },
+    { "%Qo",  "16/17",  "20/21" },
+    { "%Qo", "-16/17", "-20/21" },
+
+    { "%#Qx",    "0",     "0" },
+    { "%#QX",    "0",     "0" },
+    { "%#Qx",  "123",  "0x7b" },
+    { "%#QX",  "123",  "0X7B" },
+    { "%#Qx",  "5/8",  "0x5/0x8" },
+    { "%#QX",  "5/8",  "0X5/0X8" },
+    { "%#Qx", "-123", "-0x7b" },
+    { "%#QX", "-123", "-0X7B" },
+    { "%#Qx", "-5/8", "-0x5/0x8" },
+    { "%#QX", "-5/8", "-0X5/0X8" },
+    { "%#Qo",    "0",     "0" },
+    { "%#Qo",  "123",  "0173" },
+    { "%#Qo", "-123", "-0173" },
+    { "%#Qo",  "5/7",  "05/07" },
+    { "%#Qo", "-5/7", "-05/07" },
+
+    /* zero denominator and showbase */
+    { "%#10Qo", "0/0",     "       0/0" },
+    { "%#10Qd", "0/0",     "       0/0" },
+    { "%#10Qx", "0/0",     "       0/0" },
+    { "%#10Qo", "123/0",   "    0173/0" },
+    { "%#10Qd", "123/0",   "     123/0" },
+    { "%#10Qx", "123/0",   "    0x7b/0" },
+    { "%#10QX", "123/0",   "    0X7B/0" },
+    { "%#10Qo", "-123/0",  "   -0173/0" },
+    { "%#10Qd", "-123/0",  "    -123/0" },
+    { "%#10Qx", "-123/0",  "   -0x7b/0" },
+    { "%#10QX", "-123/0",  "   -0X7B/0" },
+
+    { "%10Qd",      "0", "         0" },
+    { "%-10Qd",     "0", "0         " },
+    { "%10Qd",    "123", "       123" },
+    { "%-10Qd",   "123", "123       " },
+    { "%10Qd",   "-123", "      -123" },
+    { "%-10Qd",  "-123", "-123      " },
+
+    { "%+10Qd",   "123", "      +123" },
+    { "%+-10Qd",  "123", "+123      " },
+    { "%+10Qd",  "-123", "      -123" },
+    { "%+-10Qd", "-123", "-123      " },
+
+    { "%08Qd",    "0", "00000000" },
+    { "%08Qd",  "123", "00000123" },
+    { "%08Qd", "-123", "-0000123" },
+
+    { "%+08Qd",    "0", "+0000000" },
+    { "%+08Qd",  "123", "+0000123" },
+    { "%+08Qd", "-123", "-0000123" },
+
+    { "%#08Qx",    "0", "00000000" },
+    { "%#08Qx",  "123", "0x00007b" },
+    { "%#08Qx", "-123", "-0x0007b" },
+
+    { "%+#08Qx",    "0", "+0000000" },
+    { "%+#08Qx",  "123", "+0x0007b" },
+    { "%+#08Qx", "-123", "-0x0007b" },
+  };
+
+  int    i;
+  mpq_t  q;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      check_one (data[i].want, data[i].fmt, q);
+    }
+
+  mpq_clear (q);
+}
+
+void
+check_f (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *f;
+    const char  *want;
+
+  } data[] = {
+
+    { "%Ff",    "0",    "0.000000" },
+    { "%Ff",  "123",  "123.000000" },
+    { "%Ff", "-123", "-123.000000" },
+
+    { "%+Ff",    "0",   "+0.000000" },
+    { "%+Ff",  "123", "+123.000000" },
+    { "%+Ff", "-123", "-123.000000" },
+
+    { "%.0Ff",    "0",    "0" },
+    { "%.0Ff",  "123",  "123" },
+    { "%.0Ff", "-123", "-123" },
+
+    { "%8.0Ff",    "0", "       0" },
+    { "%8.0Ff",  "123", "     123" },
+    { "%8.0Ff", "-123", "    -123" },
+
+    { "%08.0Ff",    "0", "00000000" },
+    { "%08.0Ff",  "123", "00000123" },
+    { "%08.0Ff", "-123", "-0000123" },
+
+    { "%10.2Ff",       "0", "      0.00" },
+    { "%10.2Ff",    "0.25", "      0.25" },
+    { "%10.2Ff",  "123.25", "    123.25" },
+    { "%10.2Ff", "-123.25", "   -123.25" },
+
+    { "%-10.2Ff",       "0", "0.00      " },
+    { "%-10.2Ff",    "0.25", "0.25      " },
+    { "%-10.2Ff",  "123.25", "123.25    " },
+    { "%-10.2Ff", "-123.25", "-123.25   " },
+
+    { "%.2Ff", "0.00000000000001", "0.00" },
+    { "%.2Ff", "0.002",            "0.00" },
+    { "%.2Ff", "0.008",            "0.01" },
+
+    { "%.0Ff", "123.00000000000001", "123" },
+    { "%.0Ff", "123.2",              "123" },
+    { "%.0Ff", "123.8",              "124" },
+
+    { "%.0Ff",  "999999.9", "1000000" },
+    { "%.0Ff", "3999999.9", "4000000" },
+
+    { "%Fe",    "0",  "0.000000e+00" },
+    { "%Fe",    "1",  "1.000000e+00" },
+    { "%Fe",  "123",  "1.230000e+02" },
+
+    { "%FE",    "0",  "0.000000E+00" },
+    { "%FE",    "1",  "1.000000E+00" },
+    { "%FE",  "123",  "1.230000E+02" },
+
+    { "%Fe",    "0",  "0.000000e+00" },
+    { "%Fe",    "1",  "1.000000e+00" },
+
+    { "%.0Fe",     "10000000000",    "1e+10" },
+    { "%.0Fe",    "-10000000000",   "-1e+10" },
+
+    { "%.2Fe",     "10000000000",  "1.00e+10" },
+    { "%.2Fe",    "-10000000000", "-1.00e+10" },
+
+    { "%8.0Fe",    "10000000000", "   1e+10" },
+    { "%8.0Fe",   "-10000000000", "  -1e+10" },
+
+    { "%-8.0Fe",   "10000000000", "1e+10   " },
+    { "%-8.0Fe",  "-10000000000", "-1e+10  " },
+
+    { "%12.2Fe",   "10000000000", "    1.00e+10" },
+    { "%12.2Fe",  "-10000000000", "   -1.00e+10" },
+
+    { "%012.2Fe",  "10000000000", "00001.00e+10" },
+    { "%012.2Fe", "-10000000000", "-0001.00e+10" },
+
+    { "%Fg",   "0", "0" },
+    { "%Fg",   "1", "1" },
+    { "%Fg",   "-1", "-1" },
+
+    { "%.0Fg", "0", "0" },
+    { "%.0Fg", "1", "1" },
+    { "%.0Fg", "-1", "-1" },
+
+    { "%.1Fg", "100", "1e+02" },
+    { "%.2Fg", "100", "1e+02" },
+    { "%.3Fg", "100", "100" },
+    { "%.4Fg", "100", "100" },
+
+    { "%Fg", "0.001",    "0.001" },
+    { "%Fg", "0.0001",   "0.0001" },
+    { "%Fg", "0.00001",  "1e-05" },
+    { "%Fg", "0.000001", "1e-06" },
+
+    { "%.4Fg", "1.00000000000001", "1" },
+    { "%.4Fg", "100000000000001",  "1e+14" },
+
+    { "%.4Fg", "12345678", "1.235e+07" },
+
+    { "%Fa", "0","0x0p+0" },
+    { "%FA", "0","0X0P+0" },
+
+    { "%Fa", "1","0x1p+0" },
+    { "%Fa", "65535","0xf.fffp+12" },
+    { "%Fa", "65536","0x1p+16" },
+    { "%F.10a", "65536","0x1.0000000000p+16" },
+    { "%F.1a", "65535","0x1.0p+16" },
+    { "%F.0a", "65535","0x1p+16" },
+
+    { "%.2Ff", "0.99609375", "1.00" },
+    { "%.Ff",  "0.99609375", "0.99609375" },
+    { "%.Fe",  "0.99609375", "9.9609375e-01" },
+    { "%.Fg",  "0.99609375", "0.99609375" },
+    { "%.20Fg",  "1000000", "1000000" },
+    { "%.Fg",  "1000000", "1000000" },
+
+    { "%#.0Ff", "1", "1." },
+    { "%#.0Fe", "1", "1.e+00" },
+    { "%#.0Fg", "1", "1." },
+
+    { "%#.1Ff", "1", "1.0" },
+    { "%#.1Fe", "1", "1.0e+00" },
+    { "%#.1Fg", "1", "1." },
+
+    { "%#.4Ff", "1234", "1234.0000" },
+    { "%#.4Fe", "1234", "1.2340e+03" },
+    { "%#.4Fg", "1234", "1234." },
+
+    { "%#.8Ff", "1234", "1234.00000000" },
+    { "%#.8Fe", "1234", "1.23400000e+03" },
+    { "%#.8Fg", "1234", "1234.0000" },
+
+  };
+
+  int     i;
+  mpf_t   f;
+  double  d;
+
+  mpf_init2 (f, 256L);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      if (data[i].f[0] == '0' && data[i].f[1] == 'x')
+	mpf_set_str_or_abort (f, data[i].f, 16);
+      else
+	mpf_set_str_or_abort (f, data[i].f, 10);
+
+      /* if mpf->double doesn't truncate, then expect same result */
+      d = mpf_get_d (f);
+      if (mpf_cmp_d (f, d) == 0)
+	check_plain (data[i].want, data[i].fmt, d);
+
+      check_one (data[i].want, data[i].fmt, f);
+    }
+
+  mpf_clear (f);
+}
+
+
+void
+check_limb (void)
+{
+  int        i;
+  mp_limb_t  limb;
+  mpz_t      z;
+  char       *s;
+
+  check_one ("0", "%Md", CNST_LIMB(0));
+  check_one ("1", "%Md", CNST_LIMB(1));
+
+  /* "i" many 1 bits, tested against mpz_get_str in decimal and hex */
+  limb = 1;
+  mpz_init_set_ui (z, 1L);
+  for (i = 1; i <= GMP_LIMB_BITS; i++)
+    {
+      s = mpz_get_str (NULL, 10, z);
+      check_one (s, "%Mu", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      s = mpz_get_str (NULL, 16, z);
+      check_one (s, "%Mx", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      s = mpz_get_str (NULL, -16, z);
+      check_one (s, "%MX", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      limb = 2*limb + 1;
+      mpz_mul_2exp (z, z, 1L);
+      mpz_add_ui (z, z, 1L);
+    }
+
+  mpz_clear (z);
+}
+
+
+void
+check_n (void)
+{
+  {
+    int  n = -1;
+    check_one ("blah", "%nblah", &n);
+    ASSERT_ALWAYS (n == 0);
+  }
+
+  {
+    int  n = -1;
+    check_one ("hello ", "hello %n", &n);
+    ASSERT_ALWAYS (n == 6);
+  }
+
+  {
+    int  n = -1;
+    check_one ("hello  world", "hello %n world", &n);
+    ASSERT_ALWAYS (n == 6);
+  }
+
+#define CHECK_N(type, string)                           \
+  do {                                                  \
+    type  x[2];                                         \
+    char  fmt[128];                                     \
+							\
+    x[0] = ~ (type) 0;                                  \
+    x[1] = ~ (type) 0;                                  \
+    sprintf (fmt, "%%d%%%sn%%d", string);               \
+    check_one ("123456", fmt, 123, &x[0], 456);         \
+							\
+    /* should write whole of x[0] and none of x[1] */   \
+    ASSERT_ALWAYS (x[0] == 3);                          \
+    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);		\
+							\
+  } while (0)
+
+  CHECK_N (mp_limb_t, "M");
+  CHECK_N (char,      "hh");
+  CHECK_N (long,      "l");
+#if HAVE_LONG_LONG
+  CHECK_N (long long, "L");
+#endif
+#if HAVE_INTMAX_T
+  CHECK_N (intmax_t,  "j");
+#endif
+#if HAVE_PTRDIFF_T
+  CHECK_N (ptrdiff_t, "t");
+#endif
+  CHECK_N (short,     "h");
+  CHECK_N (size_t,    "z");
+
+  {
+    mpz_t  x[2];
+    mpz_init_set_si (x[0], -987L);
+    mpz_init_set_si (x[1],  654L);
+    check_one ("123456", "%d%Zn%d", 123, x[0], 456);
+    MPZ_CHECK_FORMAT (x[0]);
+    MPZ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);
+    mpz_clear (x[0]);
+    mpz_clear (x[1]);
+  }
+
+  {
+    mpq_t  x[2];
+    mpq_init (x[0]);
+    mpq_init (x[1]);
+    mpq_set_ui (x[0], 987L, 654L);
+    mpq_set_ui (x[1], 4115L, 226L);
+    check_one ("123456", "%d%Qn%d", 123, x[0], 456);
+    MPQ_CHECK_FORMAT (x[0]);
+    MPQ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);
+    mpq_clear (x[0]);
+    mpq_clear (x[1]);
+  }
+
+  {
+    mpf_t  x[2];
+    mpf_init (x[0]);
+    mpf_init (x[1]);
+    mpf_set_ui (x[0], 987L);
+    mpf_set_ui (x[1], 654L);
+    check_one ("123456", "%d%Fn%d", 123, x[0], 456);
+    MPF_CHECK_FORMAT (x[0]);
+    MPF_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);
+    mpf_clear (x[0]);
+    mpf_clear (x[1]);
+  }
+
+  {
+    mp_limb_t  a[5];
+    mp_limb_t  a_want[numberof(a)];
+    mp_size_t  i;
+
+    a[0] = 123;
+    check_one ("blah", "bl%Nnah", a, (mp_size_t) 0);
+    ASSERT_ALWAYS (a[0] == 123);
+
+    MPN_ZERO (a_want, numberof (a_want));
+    for (i = 1; i < numberof (a); i++)
+      {
+	check_one ("blah", "bl%Nnah", a, i);
+	a_want[0] = 2;
+	ASSERT_ALWAYS (mpn_cmp (a, a_want, i) == 0);
+      }
+  }
+}
+
+
+void
+check_misc (void)
+{
+  mpz_t  z;
+  mpf_t  f;
+
+  mpz_init (z);
+  mpf_init2 (f, 128L);
+
+  check_one ("!", "%c", '!');
+
+  check_one ("hello world", "hello %s", "world");
+  check_one ("hello:", "%s:", "hello");
+  mpz_set_ui (z, 0L);
+  check_one ("hello0", "%s%Zd", "hello", z, z);
+
+  {
+    static char  xs[801];
+    memset (xs, 'x', sizeof(xs)-1);
+    check_one (xs, "%s", xs);
+  }
+  {
+    char  *xs;
+    xs = (char *) (*__gmp_allocate_func) (MAX_OUTPUT * 2 - 12);
+    memset (xs, '%', MAX_OUTPUT * 2 - 14);
+    xs [MAX_OUTPUT * 2 - 13] = '\0';
+    xs [MAX_OUTPUT * 2 - 14] = 'x';
+    check_one (xs + MAX_OUTPUT - 7, xs, NULL);
+    (*__gmp_free_func) (xs, MAX_OUTPUT * 2 - 12);
+  }
+
+  mpz_set_ui (z, 12345L);
+  check_one ("     12345", "%*Zd", 10, z);
+  check_one ("0000012345", "%0*Zd", 10, z);
+  check_one ("12345     ", "%*Zd", -10, z);
+  check_one ("12345 and 678", "%Zd and %d", z, 678);
+  check_one ("12345,1,12345,2,12345", "%Zd,%d,%Zd,%d,%Zd", z, 1, z, 2, z);
+
+  /* from the glibc info docs */
+  mpz_set_si (z, 0L);
+  check_one ("|    0|0    |   +0|+0   |    0|00000|     |   00|0|",
+	     "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+	     /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 1L);
+  check_one ("|    1|1    |   +1|+1   |    1|00001|    1|   01|1|",
+	     "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+	     /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, -1L);
+  check_one ("|   -1|-1   |   -1|-1   |   -1|-0001|   -1|  -01|-1|",
+	     "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+	     /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 100000L);
+  check_one ("|100000|100000|+100000|+100000| 100000|100000|100000|100000|100000|",
+	     "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+	     /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 0L);
+  check_one ("|    0|    0|    0|    0|    0|    0|  00000000|",
+	     "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+	     /**/ z,   z,   z,    z,    z,    z,       z);
+  mpz_set_si (z, 1L);
+  check_one ("|    1|    1|    1|   01|  0x1|  0X1|0x00000001|",
+	     "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+	     /**/ z,   z,   z,    z,    z,    z,       z);
+  mpz_set_si (z, 100000L);
+  check_one ("|303240|186a0|186A0|0303240|0x186a0|0X186A0|0x000186a0|",
+	     "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+	     /**/ z,   z,   z,    z,    z,    z,       z);
+
+  /* %zd for size_t won't be available on old systems, and running something
+     to see if it works might be bad, so only try it on glibc, and only on a
+     new enough version (glibc 2.0 doesn't have %zd) */
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)
+  mpz_set_ui (z, 789L);
+  check_one ("456 789 blah", "%zd %Zd blah", (size_t) 456, z);
+#endif
+
+  mpz_clear (z);
+  mpf_clear (f);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_printf = 1;
+
+  tests_start ();
+  check_vfprintf_fp = fopen (CHECK_VFPRINTF_FILENAME, "w+");
+  ASSERT_ALWAYS (check_vfprintf_fp != NULL);
+
+  check_z ();
+  check_q ();
+  check_f ();
+  check_limb ();
+  check_n ();
+  check_misc ();
+
+  ASSERT_ALWAYS (fclose (check_vfprintf_fp) == 0);
+  unlink (CHECK_VFPRINTF_FILENAME);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/misc/t-scanf.c b/tests/misc/t-scanf.c
new file mode 100644
index 0000000..8c25533
--- /dev/null
+++ b/tests/misc/t-scanf.c

@@ -0,0 +1,1615 @@
+/* Test gmp_scanf and related functions.
+
+Copyright 2001-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Usage: t-scanf [-s]
+
+   -s  Check the data against the system scanf, where possible.  This is
+       only an option since we don't want to fail if the system scanf is
+       faulty or strange.
+
+   There's some fairly unattractive repetition between check_z, check_q and
+   check_f, but enough differences to make a common loop or a set of macros
+   seem like too much trouble. */
+
+#include "config.h"	/* needed for the HAVE_, could also move gmp incls */
+
+#include <stdarg.h>
+
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define TEMPFILE  "/tmp/t-scanf.tmp"
+
+int   option_libc_scanf = 0;
+
+typedef int (*fun_t) (const char *, const char *, void *, void *);
+
+
+/* This problem was seen on powerpc7450-apple-darwin7.0.0, sscanf returns 0
+   where it should return EOF.  A workaround in gmp_sscanf would be a bit
+   tedious, and since this is a rather obvious libc bug, quite likely
+   affecting other programs, we'll just suppress affected tests for now.  */
+int
+test_sscanf_eof_ok (void)
+{
+  static int  result = -1;
+
+  if (result == -1)
+    {
+      int  x;
+      if (sscanf ("", "%d", &x) == EOF)
+        {
+          result = 1;
+        }
+      else
+        {
+          printf ("Warning, sscanf(\"\",\"%%d\",&x) doesn't return EOF.\n");
+          printf ("This affects gmp_sscanf, tests involving it will be suppressed.\n");
+          printf ("You should try to get a fix for your libc.\n");
+          result = 0;
+        }
+    }
+  return result;
+}
+
+
+/* Convert fmt from a GMP scanf format string to an equivalent for a plain
+   libc scanf, for example "%Zd" becomes "%ld".  Return 1 if this succeeds,
+   0 if it cannot (or should not) be done.  */
+int
+libc_scanf_convert (char *fmt)
+{
+  char  *p = fmt;
+
+  if (! option_libc_scanf)
+    return 0;
+
+  for ( ; *fmt != '\0'; fmt++)
+    {
+      switch (*fmt) {
+      case 'F':
+      case 'Q':
+      case 'Z':
+        /* transmute */
+        *p++ = 'l';
+        break;
+      default:
+        *p++ = *fmt;
+        break;
+      }
+    }
+  *p = '\0';
+  return 1;
+}
+
+
+long  got_ftell;
+int   fromstring_next_c;
+
+/* Call gmp_fscanf, reading the "input" string data provided. */
+int
+fromstring_gmp_fscanf (const char *input, const char *fmt, ...)
+{
+  va_list  ap;
+  FILE     *fp;
+  int      ret;
+  va_start (ap, fmt);
+
+  fp = fopen (TEMPFILE, "w+");
+  ASSERT_ALWAYS (fp != NULL);
+  ASSERT_ALWAYS (fputs (input, fp) != EOF);
+  ASSERT_ALWAYS (fflush (fp) == 0);
+  rewind (fp);
+
+  ret = gmp_vfscanf (fp, fmt, ap);
+  got_ftell = ftell (fp);
+  ASSERT_ALWAYS (got_ftell != -1L);
+
+  fromstring_next_c = getc (fp);
+
+  ASSERT_ALWAYS (fclose (fp) == 0);
+  va_end (ap);
+  return ret;
+}
+
+
+int
+fun_gmp_sscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  if (a2 == NULL)
+    return gmp_sscanf (input, fmt, a1);
+  else
+    return gmp_sscanf (input, fmt, a1, a2);
+}
+
+int
+fun_gmp_fscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  if (a2 == NULL)
+    return fromstring_gmp_fscanf (input, fmt, a1);
+  else
+    return fromstring_gmp_fscanf (input, fmt, a1, a2);
+}
+
+
+int
+fun_fscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  FILE  *fp;
+  int   ret;
+
+  fp = fopen (TEMPFILE, "w+");
+  ASSERT_ALWAYS (fp != NULL);
+  ASSERT_ALWAYS (fputs (input, fp) != EOF);
+  ASSERT_ALWAYS (fflush (fp) == 0);
+  rewind (fp);
+
+  if (a2 == NULL)
+    ret = fscanf (fp, fmt, a1);
+  else
+    ret = fscanf (fp, fmt, a1, a2);
+
+  got_ftell = ftell (fp);
+  ASSERT_ALWAYS (got_ftell != -1L);
+
+  fromstring_next_c = getc (fp);
+
+  ASSERT_ALWAYS (fclose (fp) == 0);
+  return ret;
+}
+
+
+/* On various old systems, for instance HP-UX 9, the C library sscanf needs
+   to be able to write into the input string.  Ensure that this is possible,
+   when gcc is putting the test data into a read-only section.
+
+   Actually we ought to only need this under SSCANF_WRITABLE_INPUT from
+   configure, but it's just as easy to do it unconditionally, and in any
+   case this code is only executed under the -s option.  */
+
+int
+fun_sscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  char    *input_writable;
+  size_t  size;
+  int     ret;
+
+  size = strlen (input) + 1;
+  input_writable = (char *) (*__gmp_allocate_func) (size);
+  memcpy (input_writable, input, size);
+
+  if (a2 == NULL)
+    ret = sscanf (input_writable, fmt, a1);
+  else
+    ret = sscanf (input_writable, fmt, a1, a2);
+
+  (*__gmp_free_func) (input_writable, size);
+  return ret;
+}
+
+
+/* whether the format string consists entirely of ignored fields */
+int
+fmt_allignore (const char *fmt)
+{
+  int  saw_star = 1;
+  for ( ; *fmt != '\0'; fmt++)
+    {
+      switch (*fmt) {
+      case '%':
+        if (! saw_star)
+          return 0;
+        saw_star = 0;
+        break;
+      case '*':
+        saw_star = 1;
+        break;
+      }
+    }
+  return 1;
+}
+
+void
+check_z (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         want_ret;
+    long        want_ftell;
+    int         want_upto;
+    int         not_glibc;
+
+  } data[] = {
+
+    { "%Zd",    "0",    "0", 1, -1, -1 },
+    { "%Zd",    "1",    "1", 1, -1, -1 },
+    { "%Zd",  "123",  "123", 1, -1, -1 },
+    { "%Zd",   "+0",    "0", 1, -1, -1 },
+    { "%Zd",   "+1",    "1", 1, -1, -1 },
+    { "%Zd", "+123",  "123", 1, -1, -1 },
+    { "%Zd",   "-0",    "0", 1, -1, -1 },
+    { "%Zd",   "-1",   "-1", 1, -1, -1 },
+    { "%Zd", "-123", "-123", 1, -1, -1 },
+
+    { "%Zo",    "0",    "0", 1, -1, -1 },
+    { "%Zo",  "173",  "123", 1, -1, -1 },
+    { "%Zo",   "+0",    "0", 1, -1, -1 },
+    { "%Zo", "+173",  "123", 1, -1, -1 },
+    { "%Zo",   "-0",    "0", 1, -1, -1 },
+    { "%Zo", "-173", "-123", 1, -1, -1 },
+
+    { "%Zx",    "0",    "0", 1, -1, -1 },
+    { "%Zx",   "7b",  "123", 1, -1, -1 },
+    { "%Zx",   "7b",  "123", 1, -1, -1 },
+    { "%Zx",   "+0",    "0", 1, -1, -1 },
+    { "%Zx",  "+7b",  "123", 1, -1, -1 },
+    { "%Zx",  "+7b",  "123", 1, -1, -1 },
+    { "%Zx",   "-0",   "-0", 1, -1, -1 },
+    { "%Zx",  "-7b", "-123", 1, -1, -1 },
+    { "%Zx",  "-7b", "-123", 1, -1, -1 },
+    { "%ZX",    "0",    "0", 1, -1, -1 },
+    { "%ZX",   "7b",  "123", 1, -1, -1 },
+    { "%ZX",   "7b",  "123", 1, -1, -1 },
+    { "%ZX",   "+0",    "0", 1, -1, -1 },
+    { "%ZX",  "+7b",  "123", 1, -1, -1 },
+    { "%ZX",  "+7b",  "123", 1, -1, -1 },
+    { "%ZX",   "-0",   "-0", 1, -1, -1 },
+    { "%ZX",  "-7b", "-123", 1, -1, -1 },
+    { "%ZX",  "-7b", "-123", 1, -1, -1 },
+    { "%Zx",    "0",    "0", 1, -1, -1 },
+    { "%Zx",   "7B",  "123", 1, -1, -1 },
+    { "%Zx",   "7B",  "123", 1, -1, -1 },
+    { "%Zx",   "+0",    "0", 1, -1, -1 },
+    { "%Zx",  "+7B",  "123", 1, -1, -1 },
+    { "%Zx",  "+7B",  "123", 1, -1, -1 },
+    { "%Zx",   "-0",   "-0", 1, -1, -1 },
+    { "%Zx",  "-7B", "-123", 1, -1, -1 },
+    { "%Zx",  "-7B", "-123", 1, -1, -1 },
+    { "%ZX",    "0",    "0", 1, -1, -1 },
+    { "%ZX",   "7B",  "123", 1, -1, -1 },
+    { "%ZX",   "7B",  "123", 1, -1, -1 },
+    { "%ZX",   "+0",    "0", 1, -1, -1 },
+    { "%ZX",  "+7B",  "123", 1, -1, -1 },
+    { "%ZX",  "+7B",  "123", 1, -1, -1 },
+    { "%ZX",   "-0",   "-0", 1, -1, -1 },
+    { "%ZX",  "-7B", "-123", 1, -1, -1 },
+    { "%ZX",  "-7B", "-123", 1, -1, -1 },
+
+    { "%Zi",    "0",    "0", 1, -1, -1 },
+    { "%Zi",    "1",    "1", 1, -1, -1 },
+    { "%Zi",  "123",  "123", 1, -1, -1 },
+    { "%Zi",   "+0",    "0", 1, -1, -1 },
+    { "%Zi",   "+1",    "1", 1, -1, -1 },
+    { "%Zi", "+123",  "123", 1, -1, -1 },
+    { "%Zi",   "-0",    "0", 1, -1, -1 },
+    { "%Zi",   "-1",   "-1", 1, -1, -1 },
+    { "%Zi", "-123", "-123", 1, -1, -1 },
+
+    { "%Zi",    "00",    "0", 1, -1, -1 },
+    { "%Zi",  "0173",  "123", 1, -1, -1 },
+    { "%Zi",   "+00",    "0", 1, -1, -1 },
+    { "%Zi", "+0173",  "123", 1, -1, -1 },
+    { "%Zi",   "-00",    "0", 1, -1, -1 },
+    { "%Zi", "-0173", "-123", 1, -1, -1 },
+
+    { "%Zi",    "0x0",    "0", 1, -1, -1 },
+    { "%Zi",   "0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "+0x0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0x7b",  "123", 1, -1, -1 },
+    { "%Zi",  "+0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "-0x0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0x7b", "-123", 1, -1, -1 },
+    { "%Zi",  "-0x7b", "-123", 1, -1, -1 },
+    { "%Zi",    "0X0",    "0", 1, -1, -1 },
+    { "%Zi",   "0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "+0X0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0X7b",  "123", 1, -1, -1 },
+    { "%Zi",  "+0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "-0X0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0X7b", "-123", 1, -1, -1 },
+    { "%Zi",  "-0X7b", "-123", 1, -1, -1 },
+    { "%Zi",    "0x0",    "0", 1, -1, -1 },
+    { "%Zi",   "0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "+0x0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0x7B",  "123", 1, -1, -1 },
+    { "%Zi",  "+0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "-0x0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0x7B", "-123", 1, -1, -1 },
+    { "%Zi",  "-0x7B", "-123", 1, -1, -1 },
+    { "%Zi",    "0X0",    "0", 1, -1, -1 },
+    { "%Zi",   "0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "+0X0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0X7B",  "123", 1, -1, -1 },
+    { "%Zi",  "+0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "-0X0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0X7B", "-123", 1, -1, -1 },
+    { "%Zi",  "-0X7B", "-123", 1, -1, -1 },
+
+    { "%Zd",    " 0",    "0", 1, -1, -1 },
+    { "%Zd",   "  0",    "0", 1, -1, -1 },
+    { "%Zd",  "   0",    "0", 1, -1, -1 },
+    { "%Zd",   "\t0",    "0", 1, -1, -1 },
+    { "%Zd", "\t\t0",    "0", 1, -1, -1 },
+
+    { "hello%Zd",      "hello0",       "0", 1, -1, -1 },
+    { "hello%Zd",      "hello 0",      "0", 1, -1, -1 },
+    { "hello%Zd",      "hello \t0",    "0", 1, -1, -1 },
+    { "hello%Zdworld", "hello 0world", "0", 1, -1, -1 },
+
+    { "hello%*Zd",      "hello0",       "-999", 0, -1, -1 },
+    { "hello%*Zd",      "hello 0",      "-999", 0, -1, -1 },
+    { "hello%*Zd",      "hello \t0",    "-999", 0, -1, -1 },
+    { "hello%*Zdworld", "hello 0world", "-999", 0, -1, -1 },
+
+    { "%Zd",    "",     "-999", -1, -1, -555 },
+    { "%Zd",    " ",    "-999", -1, -1, -555 },
+    { " %Zd",   "",     "-999", -1, -1, -555 },
+    { "xyz%Zd", "",     "-999", -1, -1, -555 },
+
+    { "%*Zd",    "",     "-999", -1, -1, -555 },
+    { " %*Zd",   "",     "-999", -1, -1, -555 },
+    { "xyz%*Zd", "",     "-999", -1, -1, -555 },
+
+    { "%Zd",    "xyz",  "0",     0, 0, -555 },
+
+    /* match something, but invalid */
+    { "%Zd",    "-",    "-999",  0, 1, -555 },
+    { "%Zd",    "+",    "-999",  0, 1, -555 },
+    { "xyz%Zd", "xyz-", "-999",  0, 4, -555 },
+    { "xyz%Zd", "xyz+", "-999",  0, 4, -555 },
+    { "%Zi",    "0x",   "-999",  0, 2, -555 },
+    { "%Zi",    "0X",   "-999",  0, 2, -555 },
+    { "%Zi",    "0x-",  "-999",  0, 2, -555 },
+    { "%Zi",    "0X+",  "-999",  0, 2, -555 },
+    { "%Zi",    "-0x",  "-999",  0, 3, -555 },
+    { "%Zi",    "-0X",  "-999",  0, 3, -555 },
+    { "%Zi",    "+0x",  "-999",  0, 3, -555 },
+    { "%Zi",    "+0X",  "-999",  0, 3, -555 },
+
+    { "%1Zi",  "1234", "1",    1, 1, 1 },
+    { "%2Zi",  "1234", "12",   1, 2, 2 },
+    { "%3Zi",  "1234", "123",  1, 3, 3 },
+    { "%4Zi",  "1234", "1234", 1, 4, 4 },
+    { "%5Zi",  "1234", "1234", 1, 4, 4 },
+    { "%6Zi",  "1234", "1234", 1, 4, 4 },
+
+    { "%1Zi",  "01234", "0",     1, 1, 1 },
+    { "%2Zi",  "01234", "01",    1, 2, 2 },
+    { "%3Zi",  "01234", "012",   1, 3, 3 },
+    { "%4Zi",  "01234", "0123",  1, 4, 4 },
+    { "%5Zi",  "01234", "01234", 1, 5, 5 },
+    { "%6Zi",  "01234", "01234", 1, 5, 5 },
+    { "%7Zi",  "01234", "01234", 1, 5, 5 },
+
+    { "%1Zi",  "0x1234", "0",      1, 1, 1 },
+    { "%2Zi",  "0x1234", "-999",   0, 2, -555 },
+    { "%3Zi",  "0x1234", "0x1",    1, 3, 3 },
+    { "%4Zi",  "0x1234", "0x12",   1, 4, 4 },
+    { "%5Zi",  "0x1234", "0x123",  1, 5, 5 },
+    { "%6Zi",  "0x1234", "0x1234", 1, 6, 6 },
+    { "%7Zi",  "0x1234", "0x1234", 1, 6, 6 },
+    { "%8Zi",  "0x1234", "0x1234", 1, 6, 6 },
+
+    { "%%xyz%Zd",  "%xyz123",  "123", 1, -1, -1 },
+    { "12%%34%Zd", "12%34567", "567", 1, -1, -1 },
+    { "%%%%%Zd",   "%%123",    "123", 1, -1, -1 },
+
+    /* various subtle EOF cases */
+    { "x",       "",    "-999", EOF, 0, -555 },
+    { " x",      "",    "-999", EOF, 0, -555 },
+    { "xyz",     "",    "-999", EOF, 0, -555 },
+    { " ",       "",    "-999",   0, 0,    0 },
+    { " ",       " ",   "-999",   0, 1,    1 },
+    { "%*Zd%Zd", "",    "-999", EOF, 0, -555 },
+    { "%*Zd%Zd", "123", "-999", EOF, 3, -555 },
+    { "x",       "x",   "-999",   0, 1,    1 },
+    { "xyz",     "x",   "-999", EOF, 1, -555 },
+    { "xyz",     "xy",  "-999", EOF, 2, -555 },
+    { "xyz",     "xyz", "-999",   0, 3,    3 },
+    { "%Zn",     "",    "0",      0, 0,    0 },
+    { " %Zn",    "",    "0",      0, 0,    0 },
+    { " x%Zn",   "",    "-999", EOF, 0, -555 },
+    { "xyz%Zn",  "",    "-999", EOF, 0, -555 },
+    { " x%Zn",   "",    "-999", EOF, 0, -555 },
+    { " %Zn x",  " ",   "-999", EOF, 1, -555 },
+
+    /* these seem to tickle a bug in glibc 2.2.4 */
+    { " x",      " ",   "-999", EOF, 1, -555, 1 },
+    { " xyz",    " ",   "-999", EOF, 1, -555, 1 },
+    { " x%Zn",   " ",   "-999", EOF, 1, -555, 1 },
+  };
+
+  int         i, j, ignore;
+  int         got_ret, want_ret, got_upto, want_upto;
+  mpz_t       got, want;
+  long        got_l, want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = fmt_allignore (fmt);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].want_ret;
+
+          want_ftell = data[i].want_ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+
+          want_upto = data[i].want_upto;
+          if (want_upto == -1)
+            want_upto = strlen (data[i].input);
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+#ifdef __GLIBC__
+            if (data[i].not_glibc)
+              continue;
+#endif
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+#ifdef __GLIBC__
+            if (data[i].not_glibc)
+              continue;
+#endif
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1L;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpz_set_si (got, -999L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_l = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);
+            mpz_set_si (got, got_l);
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPZ_CHECK_FORMAT (got);
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          if (want_ret == 1 && mpz_cmp (want, got) != 0)
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ignore %d\n", ignore);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpz_trace ("  value want", want);
+              mpz_trace ("        got ", got);
+              printf    ("  upto  want =%d\n", want_upto);
+              printf    ("        got  =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_q (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         ret;
+    long        ftell;
+
+  } data[] = {
+
+    { "%Qd",    "0",    "0", 1, -1 },
+    { "%Qd",    "1",    "1", 1, -1 },
+    { "%Qd",  "123",  "123", 1, -1 },
+    { "%Qd",   "+0",    "0", 1, -1 },
+    { "%Qd",   "+1",    "1", 1, -1 },
+    { "%Qd", "+123",  "123", 1, -1 },
+    { "%Qd",   "-0",    "0", 1, -1 },
+    { "%Qd",   "-1",   "-1", 1, -1 },
+    { "%Qd", "-123", "-123", 1, -1 },
+
+    { "%Qo",    "0",    "0", 1, -1 },
+    { "%Qo",  "173",  "123", 1, -1 },
+    { "%Qo",   "+0",    "0", 1, -1 },
+    { "%Qo", "+173",  "123", 1, -1 },
+    { "%Qo",   "-0",    "0", 1, -1 },
+    { "%Qo", "-173", "-123", 1, -1 },
+
+    { "%Qx",    "0",    "0", 1, -1 },
+    { "%Qx",   "7b",  "123", 1, -1 },
+    { "%Qx",   "7b",  "123", 1, -1 },
+    { "%Qx",   "+0",    "0", 1, -1 },
+    { "%Qx",  "+7b",  "123", 1, -1 },
+    { "%Qx",  "+7b",  "123", 1, -1 },
+    { "%Qx",   "-0",   "-0", 1, -1 },
+    { "%Qx",  "-7b", "-123", 1, -1 },
+    { "%Qx",  "-7b", "-123", 1, -1 },
+    { "%QX",    "0",    "0", 1, -1 },
+    { "%QX",   "7b",  "123", 1, -1 },
+    { "%QX",   "7b",  "123", 1, -1 },
+    { "%QX",   "+0",    "0", 1, -1 },
+    { "%QX",  "+7b",  "123", 1, -1 },
+    { "%QX",  "+7b",  "123", 1, -1 },
+    { "%QX",   "-0",   "-0", 1, -1 },
+    { "%QX",  "-7b", "-123", 1, -1 },
+    { "%QX",  "-7b", "-123", 1, -1 },
+    { "%Qx",    "0",    "0", 1, -1 },
+    { "%Qx",   "7B",  "123", 1, -1 },
+    { "%Qx",   "7B",  "123", 1, -1 },
+    { "%Qx",   "+0",    "0", 1, -1 },
+    { "%Qx",  "+7B",  "123", 1, -1 },
+    { "%Qx",  "+7B",  "123", 1, -1 },
+    { "%Qx",   "-0",   "-0", 1, -1 },
+    { "%Qx",  "-7B", "-123", 1, -1 },
+    { "%Qx",  "-7B", "-123", 1, -1 },
+    { "%QX",    "0",    "0", 1, -1 },
+    { "%QX",   "7B",  "123", 1, -1 },
+    { "%QX",   "7B",  "123", 1, -1 },
+    { "%QX",   "+0",    "0", 1, -1 },
+    { "%QX",  "+7B",  "123", 1, -1 },
+    { "%QX",  "+7B",  "123", 1, -1 },
+    { "%QX",   "-0",   "-0", 1, -1 },
+    { "%QX",  "-7B", "-123", 1, -1 },
+    { "%QX",  "-7B", "-123", 1, -1 },
+
+    { "%Qi",    "0",    "0", 1, -1 },
+    { "%Qi",    "1",    "1", 1, -1 },
+    { "%Qi",  "123",  "123", 1, -1 },
+    { "%Qi",   "+0",    "0", 1, -1 },
+    { "%Qi",   "+1",    "1", 1, -1 },
+    { "%Qi", "+123",  "123", 1, -1 },
+    { "%Qi",   "-0",    "0", 1, -1 },
+    { "%Qi",   "-1",   "-1", 1, -1 },
+    { "%Qi", "-123", "-123", 1, -1 },
+
+    { "%Qi",    "00",    "0", 1, -1 },
+    { "%Qi",  "0173",  "123", 1, -1 },
+    { "%Qi",   "+00",    "0", 1, -1 },
+    { "%Qi", "+0173",  "123", 1, -1 },
+    { "%Qi",   "-00",    "0", 1, -1 },
+    { "%Qi", "-0173", "-123", 1, -1 },
+
+    { "%Qi",    "0x0",    "0", 1, -1 },
+    { "%Qi",   "0x7b",  "123", 1, -1 },
+    { "%Qi",   "0x7b",  "123", 1, -1 },
+    { "%Qi",   "+0x0",    "0", 1, -1 },
+    { "%Qi",  "+0x7b",  "123", 1, -1 },
+    { "%Qi",  "+0x7b",  "123", 1, -1 },
+    { "%Qi",   "-0x0",   "-0", 1, -1 },
+    { "%Qi",  "-0x7b", "-123", 1, -1 },
+    { "%Qi",  "-0x7b", "-123", 1, -1 },
+    { "%Qi",    "0X0",    "0", 1, -1 },
+    { "%Qi",   "0X7b",  "123", 1, -1 },
+    { "%Qi",   "0X7b",  "123", 1, -1 },
+    { "%Qi",   "+0X0",    "0", 1, -1 },
+    { "%Qi",  "+0X7b",  "123", 1, -1 },
+    { "%Qi",  "+0X7b",  "123", 1, -1 },
+    { "%Qi",   "-0X0",   "-0", 1, -1 },
+    { "%Qi",  "-0X7b", "-123", 1, -1 },
+    { "%Qi",  "-0X7b", "-123", 1, -1 },
+    { "%Qi",    "0x0",    "0", 1, -1 },
+    { "%Qi",   "0x7B",  "123", 1, -1 },
+    { "%Qi",   "0x7B",  "123", 1, -1 },
+    { "%Qi",   "+0x0",    "0", 1, -1 },
+    { "%Qi",  "+0x7B",  "123", 1, -1 },
+    { "%Qi",  "+0x7B",  "123", 1, -1 },
+    { "%Qi",   "-0x0",   "-0", 1, -1 },
+    { "%Qi",  "-0x7B", "-123", 1, -1 },
+    { "%Qi",  "-0x7B", "-123", 1, -1 },
+    { "%Qi",    "0X0",    "0", 1, -1 },
+    { "%Qi",   "0X7B",  "123", 1, -1 },
+    { "%Qi",   "0X7B",  "123", 1, -1 },
+    { "%Qi",   "+0X0",    "0", 1, -1 },
+    { "%Qi",  "+0X7B",  "123", 1, -1 },
+    { "%Qi",  "+0X7B",  "123", 1, -1 },
+    { "%Qi",   "-0X0",   "-0", 1, -1 },
+    { "%Qi",  "-0X7B", "-123", 1, -1 },
+    { "%Qi",  "-0X7B", "-123", 1, -1 },
+
+    { "%Qd",    " 0",    "0", 1, -1 },
+    { "%Qd",   "  0",    "0", 1, -1 },
+    { "%Qd",  "   0",    "0", 1, -1 },
+    { "%Qd",   "\t0",    "0", 1, -1 },
+    { "%Qd", "\t\t0",    "0", 1, -1 },
+
+    { "%Qd",  "3/2",   "3/2", 1, -1 },
+    { "%Qd", "+3/2",   "3/2", 1, -1 },
+    { "%Qd", "-3/2",  "-3/2", 1, -1 },
+
+    { "%Qx",  "f/10", "15/16", 1, -1 },
+    { "%Qx",  "F/10", "15/16", 1, -1 },
+    { "%QX",  "f/10", "15/16", 1, -1 },
+    { "%QX",  "F/10", "15/16", 1, -1 },
+
+    { "%Qo",  "20/21",  "16/17", 1, -1 },
+    { "%Qo", "-20/21", "-16/17", 1, -1 },
+
+    { "%Qi",    "10/11",  "10/11", 1, -1 },
+    { "%Qi",   "+10/11",  "10/11", 1, -1 },
+    { "%Qi",   "-10/11", "-10/11", 1, -1 },
+    { "%Qi",   "010/11",   "8/11", 1, -1 },
+    { "%Qi",  "+010/11",   "8/11", 1, -1 },
+    { "%Qi",  "-010/11",  "-8/11", 1, -1 },
+    { "%Qi",  "0x10/11",  "16/11", 1, -1 },
+    { "%Qi", "+0x10/11",  "16/11", 1, -1 },
+    { "%Qi", "-0x10/11", "-16/11", 1, -1 },
+
+    { "%Qi",    "10/011",  "10/9", 1, -1 },
+    { "%Qi",   "+10/011",  "10/9", 1, -1 },
+    { "%Qi",   "-10/011", "-10/9", 1, -1 },
+    { "%Qi",   "010/011",   "8/9", 1, -1 },
+    { "%Qi",  "+010/011",   "8/9", 1, -1 },
+    { "%Qi",  "-010/011",  "-8/9", 1, -1 },
+    { "%Qi",  "0x10/011",  "16/9", 1, -1 },
+    { "%Qi", "+0x10/011",  "16/9", 1, -1 },
+    { "%Qi", "-0x10/011", "-16/9", 1, -1 },
+
+    { "%Qi",    "10/0x11",  "10/17", 1, -1 },
+    { "%Qi",   "+10/0x11",  "10/17", 1, -1 },
+    { "%Qi",   "-10/0x11", "-10/17", 1, -1 },
+    { "%Qi",   "010/0x11",   "8/17", 1, -1 },
+    { "%Qi",  "+010/0x11",   "8/17", 1, -1 },
+    { "%Qi",  "-010/0x11",  "-8/17", 1, -1 },
+    { "%Qi",  "0x10/0x11",  "16/17", 1, -1 },
+    { "%Qi", "+0x10/0x11",  "16/17", 1, -1 },
+    { "%Qi", "-0x10/0x11", "-16/17", 1, -1 },
+
+    { "hello%Qd",      "hello0",         "0", 1, -1 },
+    { "hello%Qd",      "hello 0",        "0", 1, -1 },
+    { "hello%Qd",      "hello \t0",      "0", 1, -1 },
+    { "hello%Qdworld", "hello 0world",   "0", 1, -1 },
+    { "hello%Qd",      "hello3/2",     "3/2", 1, -1 },
+
+    { "hello%*Qd",      "hello0",        "-999/121", 0, -1 },
+    { "hello%*Qd",      "hello 0",       "-999/121", 0, -1 },
+    { "hello%*Qd",      "hello \t0",     "-999/121", 0, -1 },
+    { "hello%*Qdworld", "hello 0world",  "-999/121", 0, -1 },
+    { "hello%*Qdworld", "hello3/2world", "-999/121", 0, -1 },
+
+    { "%Qd",    "",     "-999/121", -1, -1 },
+    { "%Qd",   " ",     "-999/121", -1, -1 },
+    { " %Qd",   "",     "-999/121", -1, -1 },
+    { "xyz%Qd", "",     "-999/121", -1, -1 },
+
+    { "%*Qd",    "",     "-999/121", -1, -1 },
+    { " %*Qd",   "",     "-999/121", -1, -1 },
+    { "xyz%*Qd", "",     "-999/121", -1, -1 },
+
+    /* match something, but invalid */
+    { "%Qd",    "-",     "-999/121",  0, 1 },
+    { "%Qd",    "+",     "-999/121",  0, 1 },
+    { "%Qd",    "/-",    "-999/121",  0, 1 },
+    { "%Qd",    "/+",    "-999/121",  0, 1 },
+    { "%Qd",    "-/",    "-999/121",  0, 1 },
+    { "%Qd",    "+/",    "-999/121",  0, 1 },
+    { "%Qd",    "-/-",   "-999/121",  0, 1 },
+    { "%Qd",    "-/+",   "-999/121",  0, 1 },
+    { "%Qd",    "+/+",   "-999/121",  0, 1 },
+    { "%Qd",    "/123",  "-999/121",  0, 1 },
+    { "%Qd",    "-/123", "-999/121",  0, 1 },
+    { "%Qd",    "+/123", "-999/121",  0, 1 },
+    { "%Qd",    "123/",  "-999/121",  0, 1 },
+    { "%Qd",    "123/-", "-999/121",  0, 1 },
+    { "%Qd",    "123/+", "-999/121",  0, 1 },
+    { "xyz%Qd", "xyz-",  "-999/121",  0, 4 },
+    { "xyz%Qd", "xyz+",  "-999/121",  0, 4 },
+
+    { "%1Qi",  "12/57", "1",        1, 1 },
+    { "%2Qi",  "12/57", "12",       1, 2 },
+    { "%3Qi",  "12/57", "-999/121", 0, -1 },
+    { "%4Qi",  "12/57", "12/5",     1, 4 },
+    { "%5Qi",  "12/57", "12/57",    1, 5 },
+    { "%6Qi",  "12/57", "12/57",    1, 5 },
+    { "%7Qi",  "12/57", "12/57",    1, 5 },
+
+    { "%1Qi",  "012/057", "0",        1, 1 },
+    { "%2Qi",  "012/057", "01",       1, 2 },
+    { "%3Qi",  "012/057", "012",      1, 3 },
+    { "%4Qi",  "012/057", "-999/121", 0, -1 },
+    { "%5Qi",  "012/057", "012/0",    1, 5 },
+    { "%6Qi",  "012/057", "012/5",    1, 6 },
+    { "%7Qi",  "012/057", "012/057",  1, 7 },
+    { "%8Qi",  "012/057", "012/057",  1, 7 },
+    { "%9Qi",  "012/057", "012/057",  1, 7 },
+
+    { "%1Qi",  "0x12/0x57", "0",         1, 1 },
+    { "%2Qi",  "0x12/0x57", "-999",      0, 2 },
+    { "%3Qi",  "0x12/0x57", "0x1",       1, 3 },
+    { "%4Qi",  "0x12/0x57", "0x12",      1, 4 },
+    { "%5Qi",  "0x12/0x57", "-999/121",  0, 5 },
+    { "%6Qi",  "0x12/0x57", "0x12/0",    1, 6 },
+    { "%7Qi",  "0x12/0x57", "-999/121",  0, 7 },
+    { "%8Qi",  "0x12/0x57", "0x12/0x5",  1, 8 },
+    { "%9Qi",  "0x12/0x57", "0x12/0x57", 1, 9 },
+    { "%10Qi", "0x12/0x57", "0x12/0x57", 1, 9 },
+    { "%11Qi", "0x12/0x57", "0x12/0x57", 1, 9 },
+
+    { "%Qd",  "xyz", "0", 0, 0 },
+  };
+
+  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;
+  mpq_t       got, want;
+  long        got_l, want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (want, data[i].want, 0);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = (strchr (fmt, '*') != NULL);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].ret;
+
+          want_ftell = data[i].ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+          want_upto = want_ftell;
+
+          if (want_ret == -1 || (want_ret == 0 && ! ignore))
+            {
+              want_ftell = -1;
+              want_upto = -555;
+            }
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+            if (strchr (data[i].input, '/') != NULL)
+              continue;
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+            if (strchr (data[i].input, '/') != NULL)
+              continue;
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpq_set_si (got, -999L, 121L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_l = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);
+            mpq_set_si (got, got_l, (got_l == -999L ? 121L : 1L));
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPZ_CHECK_FORMAT (mpq_numref (got));
+          MPZ_CHECK_FORMAT (mpq_denref (got));
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          /* use direct mpz compares, since some of the test data is
+             non-canonical and can trip ASSERTs in mpq_equal */
+          if (want_ret == 1
+              && ! (mpz_cmp (mpq_numref(want), mpq_numref(got)) == 0
+                    && mpz_cmp (mpq_denref(want), mpq_denref(got)) == 0))
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpq_trace ("  value want", want);
+              mpq_trace ("        got ", got);
+              printf    ("  upto  want=%d\n", want_upto);
+              printf    ("        got =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+void
+check_f (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         ret;
+    long        ftell;    /* or -1 for length of input string */
+
+  } data[] = {
+
+    { "%Ff",    "0",    "0", 1, -1 },
+    { "%Fe",    "0",    "0", 1, -1 },
+    { "%FE",    "0",    "0", 1, -1 },
+    { "%Fg",    "0",    "0", 1, -1 },
+    { "%FG",    "0",    "0", 1, -1 },
+
+    { "%Ff",  "123",    "123", 1, -1 },
+    { "%Ff", "+123",    "123", 1, -1 },
+    { "%Ff", "-123",   "-123", 1, -1 },
+    { "%Ff",  "123.",   "123", 1, -1 },
+    { "%Ff", "+123.",   "123", 1, -1 },
+    { "%Ff", "-123.",  "-123", 1, -1 },
+    { "%Ff",  "123.0",  "123", 1, -1 },
+    { "%Ff", "+123.0",  "123", 1, -1 },
+    { "%Ff", "-123.0", "-123", 1, -1 },
+    { "%Ff",  "0123",   "123", 1, -1 },
+    { "%Ff", "-0123",  "-123", 1, -1 },
+
+    { "%Ff",  "123.456e3",   "123456", 1, -1 },
+    { "%Ff", "-123.456e3",  "-123456", 1, -1 },
+    { "%Ff",  "123.456e+3",  "123456", 1, -1 },
+    { "%Ff", "-123.456e+3", "-123456", 1, -1 },
+    { "%Ff",  "123000e-3",      "123", 1, -1 },
+    { "%Ff", "-123000e-3",     "-123", 1, -1 },
+    { "%Ff",  "123000.e-3",     "123", 1, -1 },
+    { "%Ff", "-123000.e-3",    "-123", 1, -1 },
+
+    { "%Ff",  "123.456E3",   "123456", 1, -1 },
+    { "%Ff", "-123.456E3",  "-123456", 1, -1 },
+    { "%Ff",  "123.456E+3",  "123456", 1, -1 },
+    { "%Ff", "-123.456E+3", "-123456", 1, -1 },
+    { "%Ff",  "123000E-3",      "123", 1, -1 },
+    { "%Ff", "-123000E-3",     "-123", 1, -1 },
+    { "%Ff",  "123000.E-3",     "123", 1, -1 },
+    { "%Ff", "-123000.E-3",    "-123", 1, -1 },
+
+    { "%Ff",  ".456e3",   "456", 1, -1 },
+    { "%Ff", "-.456e3",  "-456", 1, -1 },
+    { "%Ff",  ".456e+3",  "456", 1, -1 },
+    { "%Ff", "-.456e+3", "-456", 1, -1 },
+
+    { "%Ff",    " 0",    "0", 1, -1 },
+    { "%Ff",   "  0",    "0", 1, -1 },
+    { "%Ff",  "   0",    "0", 1, -1 },
+    { "%Ff",   "\t0",    "0", 1, -1 },
+    { "%Ff", "\t\t0",    "0", 1, -1 },
+
+    { "hello%Fg",      "hello0",       "0",   1, -1 },
+    { "hello%Fg",      "hello 0",      "0",   1, -1 },
+    { "hello%Fg",      "hello \t0",    "0",   1, -1 },
+    { "hello%Fgworld", "hello 0world", "0",   1, -1 },
+    { "hello%Fg",      "hello3.0",     "3.0", 1, -1 },
+
+    { "hello%*Fg",      "hello0",        "-999", 0, -1 },
+    { "hello%*Fg",      "hello 0",       "-999", 0, -1 },
+    { "hello%*Fg",      "hello \t0",     "-999", 0, -1 },
+    { "hello%*Fgworld", "hello 0world",  "-999", 0, -1 },
+    { "hello%*Fgworld", "hello3.0world", "-999", 0, -1 },
+
+    { "%Ff",     "",   "-999", -1, -1 },
+    { "%Ff",    " ",   "-999", -1, -1 },
+    { "%Ff",   "\t",   "-999", -1, -1 },
+    { "%Ff",  " \t",   "-999", -1, -1 },
+    { " %Ff",    "",   "-999", -1, -1 },
+    { "xyz%Ff",  "",   "-999", -1, -1 },
+
+    { "%*Ff",    "",   "-999", -1, -1 },
+    { " %*Ff",   "",   "-999", -1, -1 },
+    { "xyz%*Ff", "",   "-999", -1, -1 },
+
+    { "%Ff",    "xyz", "0", 0 },
+
+    /* various non-empty but invalid */
+    { "%Ff",    "-",      "-999",  0, 1 },
+    { "%Ff",    "+",      "-999",  0, 1 },
+    { "xyz%Ff", "xyz-",   "-999",  0, 4 },
+    { "xyz%Ff", "xyz+",   "-999",  0, 4 },
+    { "%Ff",    "-.",     "-999",  0, 2 },
+    { "%Ff",    "+.",     "-999",  0, 2 },
+    { "%Ff",    ".e",     "-999",  0, 1 },
+    { "%Ff",   "-.e",     "-999",  0, 2 },
+    { "%Ff",   "+.e",     "-999",  0, 2 },
+    { "%Ff",    ".E",     "-999",  0, 1 },
+    { "%Ff",   "-.E",     "-999",  0, 2 },
+    { "%Ff",   "+.E",     "-999",  0, 2 },
+    { "%Ff",    ".e123",  "-999",  0, 1 },
+    { "%Ff",   "-.e123",  "-999",  0, 2 },
+    { "%Ff",   "+.e123",  "-999",  0, 2 },
+    { "%Ff",    "123e",   "-999",  0, 4 },
+    { "%Ff",   "-123e",   "-999",  0, 5 },
+    { "%Ff",    "123e-",  "-999",  0, 5 },
+    { "%Ff",   "-123e-",  "-999",  0, 6 },
+    { "%Ff",    "123e+",  "-999",  0, 5 },
+    { "%Ff",   "-123e+",  "-999",  0, 6 },
+    { "%Ff",   "123e-Z",  "-999",  0, 5 },
+
+    /* hex floats */
+    { "%Ff", "0x123p0",       "291",  1, -1 },
+    { "%Ff", "0x123P0",       "291",  1, -1 },
+    { "%Ff", "0X123p0",       "291",  1, -1 },
+    { "%Ff", "0X123P0",       "291",  1, -1 },
+    { "%Ff", "-0x123p0",     "-291",  1, -1 },
+    { "%Ff", "+0x123p0",      "291",  1, -1 },
+    { "%Ff", "0x123.p0",      "291",  1, -1 },
+    { "%Ff", "0x12.3p4",      "291",  1, -1 },
+    { "%Ff", "-0x12.3p4",    "-291",  1, -1 },
+    { "%Ff", "+0x12.3p4",     "291",  1, -1 },
+    { "%Ff", "0x1230p-4",     "291",  1, -1 },
+    { "%Ff", "-0x1230p-4",   "-291",  1, -1 },
+    { "%Ff", "+0x1230p-4",    "291",  1, -1 },
+    { "%Ff", "+0x.1230p12",   "291",  1, -1 },
+    { "%Ff", "+0x123000p-12", "291",  1, -1 },
+    { "%Ff", "0x123 p12",     "291",  1, 5 },
+    { "%Ff", "0x9 9",           "9",  1, 3 },
+    { "%Ff", "0x01",            "1",  1, 4 },
+    { "%Ff", "0x23",           "35",  1, 4 },
+    { "%Ff", "0x45",           "69",  1, 4 },
+    { "%Ff", "0x67",          "103",  1, 4 },
+    { "%Ff", "0x89",          "137",  1, 4 },
+    { "%Ff", "0xAB",          "171",  1, 4 },
+    { "%Ff", "0xCD",          "205",  1, 4 },
+    { "%Ff", "0xEF",          "239",  1, 4 },
+    { "%Ff", "0xab",          "171",  1, 4 },
+    { "%Ff", "0xcd",          "205",  1, 4 },
+    { "%Ff", "0xef",          "239",  1, 4 },
+    { "%Ff", "0x100p0A",      "256",  1, 7 },
+    { "%Ff", "0x1p9",         "512",  1, -1 },
+
+    /* invalid hex floats */
+    { "%Ff", "0x",     "-999",  0, 2 },
+    { "%Ff", "-0x",    "-999",  0, 3 },
+    { "%Ff", "+0x",    "-999",  0, 3 },
+    { "%Ff", "0x-",    "-999",  0, 2 },
+    { "%Ff", "0x+",    "-999",  0, 2 },
+    { "%Ff", "0x.",    "-999",  0, 3 },
+    { "%Ff", "-0x.",   "-999",  0, 4 },
+    { "%Ff", "+0x.",   "-999",  0, 4 },
+    { "%Ff", "0x.p",   "-999",  0, 3 },
+    { "%Ff", "-0x.p",  "-999",  0, 4 },
+    { "%Ff", "+0x.p",  "-999",  0, 4 },
+    { "%Ff", "0x.P",   "-999",  0, 3 },
+    { "%Ff", "-0x.P",  "-999",  0, 4 },
+    { "%Ff", "+0x.P",  "-999",  0, 4 },
+    { "%Ff", ".p123",  "-999",  0, 1 },
+    { "%Ff", "-.p123", "-999",  0, 2 },
+    { "%Ff", "+.p123", "-999",  0, 2 },
+    { "%Ff", "0x1p",   "-999",  0, 4 },
+    { "%Ff", "0x1p-",  "-999",  0, 5 },
+    { "%Ff", "0x1p+",  "-999",  0, 5 },
+    { "%Ff", "0x123p 12", "291",  0, 6 },
+    { "%Ff", "0x 123p12", "291",  0, 2 },
+
+  };
+
+  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;
+  mpf_t       got, want;
+  double      got_d;
+  long        want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (want, data[i].want, 10);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = (strchr (fmt, '*') != NULL);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].ret;
+
+          want_ftell = data[i].ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+          want_upto = want_ftell;
+
+          if (want_ret == -1 || (want_ret == 0 && ! ignore))
+            want_upto = -555;
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpf_set_si (got, -999L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_d = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_d, &got_upto);
+            mpf_set_d (got, got_d);
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPF_CHECK_FORMAT (got);
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          if (want_ret == 1 && mpf_cmp (want, got) != 0)
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpf_trace ("  value want", want);
+              mpf_trace ("        got ", got);
+              printf    ("  upto  want=%d\n", want_upto);
+              printf    ("        got =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+
+void
+check_n (void)
+{
+  int    ret;
+
+  /* %n suppressed */
+  {
+    int n = 123;
+    gmp_sscanf ("   ", " %*n", &n);
+    ASSERT_ALWAYS (n == 123);
+  }
+  {
+    int n = 123;
+    fromstring_gmp_fscanf ("   ", " %*n", &n);
+    ASSERT_ALWAYS (n == 123);
+  }
+
+
+#define CHECK_N(type, string)                           \
+  do {                                                  \
+    type  x[2];                                         \
+    char  fmt[128];                                     \
+    int   ret;                                          \
+                                                        \
+    x[0] = ~ (type) 0;                                  \
+    x[1] = ~ (type) 0;                                  \
+    sprintf (fmt, "abc%%%sn", string);                  \
+    ret = gmp_sscanf ("abc", fmt, &x[0]);               \
+                                                        \
+    ASSERT_ALWAYS (ret == 0);                           \
+                                                        \
+    /* should write whole of x[0] and none of x[1] */   \
+    ASSERT_ALWAYS (x[0] == 3);                          \
+    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);		\
+                                                        \
+  } while (0)
+
+  CHECK_N (char,      "hh");
+  CHECK_N (long,      "l");
+#if HAVE_LONG_LONG
+  CHECK_N (long long, "L");
+#endif
+#if HAVE_INTMAX_T
+  CHECK_N (intmax_t,  "j");
+#endif
+#if HAVE_PTRDIFF_T
+  CHECK_N (ptrdiff_t, "t");
+#endif
+  CHECK_N (short,     "h");
+  CHECK_N (size_t,    "z");
+
+  /* %Zn */
+  {
+    mpz_t  x[2];
+    mpz_init_set_si (x[0], -987L);
+    mpz_init_set_si (x[1],  654L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Zn", x[0]);
+    MPZ_CHECK_FORMAT (x[0]);
+    MPZ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);
+    mpz_clear (x[0]);
+    mpz_clear (x[1]);
+  }
+  {
+    mpz_t  x;
+    mpz_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Zn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x, 3L) == 0);
+    mpz_clear (x);
+  }
+
+  /* %Qn */
+  {
+    mpq_t  x[2];
+    mpq_init (x[0]);
+    mpq_init (x[1]);
+    mpq_set_ui (x[0], 987L, 654L);
+    mpq_set_ui (x[1], 4115L, 226L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Qn", x[0]);
+    MPQ_CHECK_FORMAT (x[0]);
+    MPQ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);
+    mpq_clear (x[0]);
+    mpq_clear (x[1]);
+  }
+  {
+    mpq_t  x;
+    mpq_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Qn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x, 3L, 1L) == 0);
+    mpq_clear (x);
+  }
+
+  /* %Fn */
+  {
+    mpf_t  x[2];
+    mpf_init (x[0]);
+    mpf_init (x[1]);
+    mpf_set_ui (x[0], 987L);
+    mpf_set_ui (x[1], 654L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Fn", x[0]);
+    MPF_CHECK_FORMAT (x[0]);
+    MPF_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);
+    mpf_clear (x[0]);
+    mpf_clear (x[1]);
+  }
+  {
+    mpf_t  x;
+    mpf_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Fn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x, 3L) == 0);
+    mpf_clear (x);
+  }
+}
+
+
+void
+check_misc (void)
+{
+  int  ret, cmp;
+  {
+    int  a=9, b=8, c=7, n=66;
+    mpz_t  z;
+    mpz_init (z);
+    ret = gmp_sscanf ("1 2 3 4", "%d %d %d %Zd%n",
+                      &a, &b, &c, z, &n);
+    ASSERT_ALWAYS (ret == 4);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (b == 2);
+    ASSERT_ALWAYS (c == 3);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    mpz_clear (z);
+  }
+  {
+    int  a=9, b=8, c=7, n=66;
+    mpz_t  z;
+    mpz_init (z);
+    ret = fromstring_gmp_fscanf ("1 2 3 4", "%d %d %d %Zd%n",
+                                 &a, &b, &c, z, &n);
+    ASSERT_ALWAYS (ret == 4);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (b == 2);
+    ASSERT_ALWAYS (c == 3);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (got_ftell == 7);
+    mpz_clear (z);
+  }
+
+  {
+    int  a=9, n=8;
+    mpz_t  z;
+    mpz_init (z);
+    ret = gmp_sscanf ("1 2 3 4", "%d %*d %*d %Zd%n", &a, z, &n);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    mpz_clear (z);
+  }
+  {
+    int  a=9, n=8;
+    mpz_t  z;
+    mpz_init (z);
+    ret = fromstring_gmp_fscanf ("1 2 3 4", "%d %*d %*d %Zd%n",
+                                 &a, z, &n);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (got_ftell == 7);
+    mpz_clear (z);
+  }
+
+  /* EOF for no matching */
+  {
+    char buf[128];
+    ret = gmp_sscanf ("   ", "%s", buf);
+    ASSERT_ALWAYS (ret == EOF);
+    ret = fromstring_gmp_fscanf ("   ", "%s", buf);
+    ASSERT_ALWAYS (ret == EOF);
+    if (option_libc_scanf)
+      {
+        ret = sscanf ("   ", "%s", buf);
+        ASSERT_ALWAYS (ret == EOF);
+        ret = fun_fscanf ("   ", "%s", buf, NULL);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+  }
+
+  /* suppressed field, then eof */
+  {
+    int  x;
+    if (test_sscanf_eof_ok ())
+      {
+        ret = gmp_sscanf ("123", "%*d%d", &x);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+    ret = fromstring_gmp_fscanf ("123", "%*d%d", &x);
+    ASSERT_ALWAYS (ret == EOF);
+    if (option_libc_scanf)
+      {
+        ret = sscanf ("123", "%*d%d", &x);
+        ASSERT_ALWAYS (ret == EOF);
+        ret = fun_fscanf ("123", "%*d%d", &x, NULL);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+  }
+  {
+    mpz_t  x;
+    mpz_init (x);
+    ret = gmp_sscanf ("123", "%*Zd%Zd", x);
+    ASSERT_ALWAYS (ret == EOF);
+    ret = fromstring_gmp_fscanf ("123", "%*Zd%Zd", x);
+    ASSERT_ALWAYS (ret == EOF);
+    mpz_clear (x);
+  }
+
+  /* %[...], glibc only */
+#ifdef __GLIBC__
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("abcdefgh", "%[a-d]ef%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "abcd");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 6);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("xyza", "%[^a]a%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "xyz");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 4);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("ab]ab]", "%[]ab]%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "ab]ab]");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 6);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("xyzb", "%[^]ab]b%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "xyz");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 4);
+  }
+#endif
+
+  /* %zd etc won't be accepted by sscanf on old systems, and running
+     something to see if they work might be bad, so only try it on glibc,
+     and only on a new enough version (glibc 2.0 doesn't have %zd) */
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)
+  {
+    mpz_t   z;
+    size_t  s = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%zd %Zd", &s, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (s == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+  {
+    mpz_t      z;
+    ptrdiff_t  d = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%td %Zd", &d, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (d == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+  {
+    mpz_t      z;
+    long long  ll = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%Ld %Zd", &ll, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (ll == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_libc_scanf = 1;
+
+  tests_start ();
+
+  mp_trace_base = 16;
+
+  check_z ();
+  check_q ();
+  check_f ();
+  check_n ();
+  check_misc ();
+
+  unlink (TEMPFILE);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/reuse.c b/tests/mpf/reuse.c
new file mode 100644
index 0000000..86db5b3
--- /dev/null
+++ b/tests/mpf/reuse.c

@@ -0,0 +1,218 @@
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpf_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpf_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+#ifndef EXPO
+#define EXPO 32
+#endif
+
+void dump_abort (const char *, mpf_t, mpf_t);
+
+typedef void (*dss_func) (mpf_ptr, mpf_srcptr, mpf_srcptr);
+
+dss_func dss_funcs[] =
+{
+  mpf_div, mpf_add, mpf_mul, mpf_sub,
+};
+
+const char *dss_func_names[] =
+{
+  "mpf_div", "mpf_add", "mpf_mul", "mpf_sub",
+};
+
+typedef void (*dsi_func) (mpf_ptr, mpf_srcptr, unsigned long int);
+
+dsi_func dsi_funcs[] =
+{
+  mpf_div_ui, mpf_add_ui, mpf_mul_ui, mpf_sub_ui,
+  mpf_mul_2exp, mpf_div_2exp, mpf_pow_ui
+};
+
+const char *dsi_func_names[] =
+{
+  "mpf_div_ui", "mpf_add_ui", "mpf_mul_ui", "mpf_sub_ui",
+  "mpf_mul_2exp", "mpf_div_2exp", "mpf_pow_ui"
+};
+
+typedef void (*dis_func) (mpf_ptr, unsigned long int, mpf_srcptr);
+
+dis_func dis_funcs[] =
+{
+  mpf_ui_div, mpf_ui_sub,
+};
+
+const char *dis_func_names[] =
+{
+  "mpf_ui_div", "mpf_ui_sub",
+};
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int pass, reps = 10000;
+  mpf_t in1, in2, out1;
+  unsigned long int in1i, in2i;
+  mpf_t res1, res2, res3;
+  mp_size_t bprec = 100;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (in1);
+  mpf_init (in2);
+  mpf_init (out1);
+  mpf_init (res1);
+  mpf_init (res2);
+  mpf_init (res3);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      mpf_random2 (in1, urandom () % SIZE - SIZE/2, urandom () % EXPO);
+      mpf_random2 (in2, urandom () % SIZE - SIZE/2, urandom () % EXPO);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+	{
+	  /* Don't divide by 0.  */
+	  if (i == 0 && mpf_cmp_ui (in2, 0) == 0)
+	    continue;
+
+	  (dss_funcs[i]) (res1, in1, in2);
+
+	  mpf_set (out1, in1);
+	  (dss_funcs[i]) (out1, out1, in2);
+	  mpf_set (res2, out1);
+
+	  mpf_set (out1, in2);
+	  (dss_funcs[i]) (out1, in1, out1);
+	  mpf_set (res3, out1);
+
+	  if (mpf_cmp (res1, res2) != 0)
+	    dump_abort (dss_func_names[i], res1, res2);
+	  if (mpf_cmp (res1, res3) != 0)
+	    dump_abort (dss_func_names[i], res1, res3);
+	}
+
+      in2i = urandom ();
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+	{
+	  unsigned long this_in2i = in2i;
+
+	  /* Don't divide by 0.  */
+	  if (i == 0 && this_in2i == 0) /* dsi_funcs[i] == mpf_div_ui */
+	    continue;
+
+	  /* Avoid overflow/underflow in the exponent.  */
+	  if (dsi_funcs[i] == mpf_mul_2exp || dsi_funcs[i] == mpf_div_2exp)
+	    this_in2i %= 0x100000;
+	  else if (dsi_funcs[i] == mpf_pow_ui)
+	    this_in2i %= 0x1000;
+
+	  (dsi_funcs[i]) (res1, in1, this_in2i);
+
+	  mpf_set (out1, in1);
+	  (dsi_funcs[i]) (out1, out1, this_in2i);
+	  mpf_set (res2, out1);
+
+	  if (mpf_cmp (res1, res2) != 0)
+	    dump_abort (dsi_func_names[i], res1, res2);
+	}
+
+      in1i = urandom ();
+      for (i = 0; i < sizeof (dis_funcs) / sizeof (dis_func); i++)
+	{
+	  /* Don't divide by 0.  */
+	  if (i == 0 /* dis_funcs[i] == mpf_ui_div */
+	      && mpf_cmp_ui (in2, 0) == 0)
+	    continue;
+
+	  (dis_funcs[i]) (res1, in1i, in2);
+
+	  mpf_set (out1, in2);
+	  (dis_funcs[i]) (out1, in1i, out1);
+	  mpf_set (res2, out1);
+
+	  if (mpf_cmp (res1, res2) != 0)
+	    dump_abort (dis_func_names[i], res1, res2);
+	}
+
+    }
+
+  mpf_clear (in1);
+  mpf_clear (in2);
+  mpf_clear (out1);
+  mpf_clear (res1);
+  mpf_clear (res2);
+  mpf_clear (res3);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *name, mpf_t res1, mpf_t res2)
+{
+  printf ("failure in %s:\n", name);
+  mpf_dump (res1);
+  mpf_dump (res2);
+  abort ();
+}
+
+#if 0
+void mpf_abs		(mpf_ptr, mpf_srcptr);
+void mpf_sqrt		(mpf_ptr, mpf_srcptr);
+void mpf_neg		(mpf_ptr, mpf_srcptr);
+#endif
+
+#endif /* ! DLL_EXPORT */

diff --git a/tests/mpf/t-add.c b/tests/mpf/t-add.c
new file mode 100644
index 0000000..eb8bbdc
--- /dev/null
+++ b/tests/mpf/t-add.c

@@ -0,0 +1,107 @@
+/* Test mpf_add.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t u, v, w, wref;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (wref);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (u, size, exp);
+
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (v, size, exp);
+
+      mpf_add (w, u, v);
+      refmpf_add (wref, u, v);
+
+      mpf_reldiff (rerr, w, wref);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+	{
+	  mpf_set (max_rerr, rerr);
+#if VERBOSE
+	  mpf_dump (max_rerr);
+#endif
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR after %d tests\n", i);
+	      printf ("   u = "); mpf_dump (u);
+	      printf ("   v = "); mpf_dump (v);
+	      printf ("wref = "); mpf_dump (wref);
+	      printf ("   w = "); mpf_dump (w);
+	      abort ();
+	    }
+	}
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (wref);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-cmp_d.c b/tests/mpf/t-cmp_d.c
new file mode 100644
index 0000000..213c091
--- /dev/null
+++ b/tests/mpf/t-cmp_d.c

@@ -0,0 +1,103 @@
+/* Test mpf_cmp_d.
+
+Copyright 2001, 2003, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+void
+check_one (const char *name, mpf_srcptr x, double y, int cmp)
+{
+  int   got;
+
+  got = mpf_cmp_d (x, y);
+  if (SGN(got) != cmp)
+    {
+      int i;
+      printf    ("mpf_cmp_d wrong (from %s)\n", name);
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmp);
+      mpf_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      mp_trace_base=-16;
+      mpf_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      printf    ("  y");
+      for (i = 0; i < sizeof(y); i++)
+        printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+      printf ("\n");
+      abort ();
+    }
+}
+
+void
+check_infinity (void)
+{
+  mpf_t   x;
+  double  y = tests_infinity_d ();
+  if (y == 0.0)
+    return;
+
+  mpf_init (x);
+
+  /* 0 cmp inf */
+  mpf_set_ui (x, 0L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* 123 cmp inf */
+  mpf_set_ui (x, 123L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* -123 cmp inf */
+  mpf_set_si (x, -123L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* 2^5000 cmp inf */
+  mpf_set_ui (x, 1L);
+  mpf_mul_2exp (x, x, 5000L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* -2^5000 cmp inf */
+  mpf_neg (x, x);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  mpf_clear (x);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_infinity ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-cmp_si.c b/tests/mpf/t-cmp_si.c
new file mode 100644
index 0000000..29f7cdb
--- /dev/null
+++ b/tests/mpf/t-cmp_si.c

@@ -0,0 +1,134 @@
+/* Test mpf_cmp_si and mpf_cmp_z.
+
+Copyright 2000, 2001, 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
+
+void
+check_data (void)
+{
+  static const struct {
+    int         a_base;
+    const char  *a;
+    const char  *b;
+    int         want;
+  } data[] = {
+    { 10, "0",  "1", -1 },
+    { 10, "0",  "0",  0 },
+    { 10, "0", "-1",  1 },
+
+    { 10, "1",  "1", 0 },
+    { 10, "1",  "0", 1 },
+    { 10, "1", "-1", 1 },
+
+    { 10, "-1",  "1", -1 },
+    { 10, "-1",  "0", -1 },
+    { 10, "-1", "-1", 0 },
+
+    { 10, "1.5", "2", -1 },
+    { 10, "1.5", "1",  1 },
+    { 10, "0.5", "1", -1 },
+
+    { 10, "-1.5", "-2",  1 },
+    { 10, "-1.5", "-1", -1 },
+    { 10, "-0.5", "-1",  1 },
+
+    { 16,         "0", "-0x80000000",  1 },
+    { 16,  "80000000", "-0x80000000",  1 },
+    { 16,  "80000001", "-0x80000000",  1 },
+    { 16, "-80000000", "-0x80000000",  0 },
+    { 16, "-80000001", "-0x80000000", -1 },
+    { 16, "-FF0080000001", "-0x80000000", -1 },
+
+    { 16,                 "0", "-0x8000000000000000",  1 },
+    { 16,  "8000000000000000", "-0x8000000000000000",  1 },
+    { 16,  "8000000000000001", "-0x8000000000000000",  1 },
+    { 16, "-8000000000000000", "-0x8000000000000000",  0 },
+    { 16, "-8000000000000000.1", "-0x8000000000000000", -1 },
+    { 16, "-FF008000000000000001", "-0x8000000000000000", -1 },
+
+    { 16,                 "0", "-0x876543210FEDCBA9876543210000000",  1 },
+    { 16,  "876543210FEDCBA9876543210000000", "-0x876543210FEDCBA9876543210000000",  1 },
+    { 16,  "876543210FEDCBA9876543210000001", "-0x876543210FEDCBA9876543210000000",  1 },
+    { 16, "-876543210FEDCBA9876543210000000", "-0x876543210FEDCBA9876543210000000",  0 },
+    { 16, "-876543210FEDCBA9876543210000000.1", "-0x876543210FEDCBA9876543210000000", -1 },
+    { 16, "-FF00876543210FEDCBA9876543210000000", "-0x876543210FEDCBA9876543210000000", -1 },
+  };
+
+  mpf_t  a;
+  mpz_t  bz;
+  long   b;
+  int    got;
+  int    i;
+
+  mpf_init2 (a, 128);
+  mpz_init (bz);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (a, data[i].a, data[i].a_base);
+      mpz_set_str_or_abort (bz, data[i].b, 0);
+
+      if (mpz_fits_slong_p (bz))
+        {
+          b = mpz_get_si (bz);
+          got = mpf_cmp_si (a, b);
+          if (SGN (got) != data[i].want)
+            {
+              printf ("mpf_cmp_si wrong on data[%d]\n", i);
+              printf ("  a="); mpf_out_str (stdout, 10, 0, a);
+              printf (" (%s)\n", data[i].a);
+              printf ("  b=%ld (%s)\n", b, data[i].b);
+              printf ("  got=%d\n", got);
+              printf ("  want=%d\n", data[i].want);
+              abort();
+            }
+        }
+
+      got = mpf_cmp_z (a, bz);
+      if (SGN (got) != data[i].want)
+	{
+	  b = mpz_get_si (bz);
+	  printf ("mpf_cmp_z wrong on data[%d]\n", i);
+	  printf ("  a="); mpf_out_str (stdout, 10, 0, a);
+	  printf (" (%s)\n", data[i].a);
+	  printf ("  b=%ld (%s)\n", b, data[i].b);
+	  printf ("  got=%d\n", got);
+	  printf ("  want=%d\n", data[i].want);
+	  abort();
+	}
+    }
+
+  mpf_clear (a);
+  mpz_clear (bz);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-conv.c b/tests/mpf/t-conv.c
new file mode 100644
index 0000000..2f992af
--- /dev/null
+++ b/tests/mpf/t-conv.c

@@ -0,0 +1,261 @@
+/* Test mpf_get_str and mpf_set_str.
+
+Copyright 1996, 2000, 2001, 2008, 2019, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 10
+#endif
+
+#ifndef EXPO
+#define EXPO 200
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpf_t x, y;
+  int reps = 20000;
+  int i;
+  mp_size_t bprec = 100;
+  mpf_t d, rerr, max_rerr, limit_rerr;
+  char *str;
+  mp_exp_t bexp;
+  long size, exp;
+  int base;
+  char buf[SIZE * GMP_LIMB_BITS + 5];
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (x);
+  mpf_init (y);
+  mpf_init (d);
+
+  /* First test some specific values.  */
+
+  mpf_set_str (y, "1.23456", 0);
+  mpf_set_str (x, "1.23456", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "00000000000000000000000000000000000000001.23456", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0.000000000000000000000000000000000000000123456e40", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, ".000000000000000000000000000000000000000123456e40", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "00000000000000000000.00000000000000000000123456e21", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  mpf_set_str (y, "1.23456e1000", 0);
+  mpf_set_str (x, "1.23456e1000", 10);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "1.23456e+1000", 0);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "1.23456e+1000", 10);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "00000000000000000000000000000000000000001.23456e+1000", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0.000000000000000000000000000000000000000123456e+1040", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, ".000000000000000000000000000000000000000123456e+1040", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "00000000000000000000.00000000000000000000123456e+1021", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  mpf_set_str (y, "1.23456", 16);
+  mpf_set_str (x, "00000000000000000000000000000000000000001.23456", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0.000000000000000000000000000000000000000123456@28", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, ".000000000000000000000000000000000000000123456@28", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "00000000000000000000.00000000000000000000123456@15", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  mpf_set_str (y, "   0", 10);
+  mpf_set_str (x, "00000000000000000000000000000000000000000000000000000", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0000000000000000000000000000000000000000000000000000.", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "000000000000000000000000000000000000000000000000000.0", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, ".0000000000000000000000000000000000000000000000000000", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0.000000000000000000000000000000000000000000000000000", 10);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  mpf_set_str (x, "00000000000000000000000000000000000000000000000000000", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0000000000000000000000000000000000000000000000000000.", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "000000000000000000000000000000000000000000000000000.0", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, ".0000000000000000000000000000000000000000000000000000", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "0.000000000000000000000000000000000000000000000000000", 16);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "+00000000000000000000000000000000000000000000000000000e-345", 9);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "-0000000000000000000000000000000000000000000000000000.@AB", 26);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "000000000000000000000000000000000000000000000000000.0@78", 19);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "+.0000000000000000000000000000000000000000000000000000e555", 6);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "-0.000000000000000000000000000000000000000000000000000@-AAAAAAAAAAAAAAAAAAAAAAAA", 17);
+  MPF_CHECK_FORMAT (x);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  /* Now test random values.  */
+
+  for (i = 0; i < reps; i++)
+    {
+      if (i == 0)
+        {
+          /* exercise the special case in get_str for for x==0 */
+          mpf_set_ui (x, 0L);
+          base = 0;
+        }
+      else
+        {
+          size = urandom () % (2 * SIZE) - SIZE;
+          exp = urandom () % EXPO;
+          mpf_random2 (x, size, exp);
+          base = urandom () % 62;
+          base += base > 0;
+        }
+
+      str = mpf_get_str (0, &bexp, base, 0, x);
+
+      if (str[0] == '-')
+	sprintf (buf, "-0.%s@%ld", str + 1, bexp);
+      else
+	sprintf (buf, "0.%s@%ld", str, bexp);
+
+      mpf_set_str_or_abort (y, buf, -base);
+      (*__gmp_free_func) (str, strlen (str) + 1);
+
+      mpf_reldiff (rerr, x, y);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+	{
+	  mpf_set (max_rerr, rerr);
+#if VERBOSE
+	  mpf_dump (max_rerr);
+#endif
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR after %d tests\n", i);
+	      printf ("base = %d\n", base);
+	      printf ("   x = "); mpf_dump (x);
+	      printf ("   y = "); mpf_dump (y);
+	      abort ();
+	    }
+	}
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (x);
+  mpf_clear (y);
+  mpf_clear (d);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-div.c b/tests/mpf/t-div.c
new file mode 100644
index 0000000..3214592
--- /dev/null
+++ b/tests/mpf/t-div.c

@@ -0,0 +1,185 @@
+/* Test mpf_div.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, mpf_srcptr u, mpf_srcptr v)
+{
+  if (! refmpf_validate_division ("mpf_div", got, u, v))
+    {
+      mp_trace_base = -16;
+      mpf_trace ("  u", u);
+      mpf_trace ("  v", v);
+      printf    ("  %s\n", desc);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec;
+  mpf_t  got, u, v;
+  int    i;
+
+  mpf_init (got);
+  mpf_init (u);
+  mpf_init (v);
+
+  /* separate */
+  for (i = 0; i < 100; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (u, prec);
+      do {
+        mpf_random2 (u, PREC(u), (mp_exp_t) 20);
+      } while (SIZ(u) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (u, u);
+
+      /* v */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (v, prec);
+      do {
+        mpf_random2 (v, PREC(v), (mp_exp_t) 20);
+      } while (SIZ(v) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (v, v);
+
+      switch (i % 3) {
+      case 0:
+        mpf_div (got, u, v);
+        check_one ("separate", got, u, v);
+        break;
+      case 1:
+        prec = refmpf_set_overlap (got, u);
+        mpf_div (got, got, v);
+        check_one ("dst == u", got, u, v);
+        mpf_set_prec_raw (got, prec);
+        break;
+      case 2:
+        prec = refmpf_set_overlap (got, v);
+        mpf_div (got, u, got);
+        check_one ("dst == v", got, u, v);
+        mpf_set_prec_raw (got, prec);
+        break;
+      }
+    }
+
+  mpf_clear (got);
+  mpf_clear (u);
+  mpf_clear (v);
+}
+
+/* Exercise calls mpf(x,x,x) */
+void
+check_reuse_three (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  result_prec, input_prec, set_prec;
+  mpf_t  got;
+  int    i;
+
+  mpf_init (got);
+
+  for (i = 0; i < 8; i++)
+    {
+      result_prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      input_prec = min_prec + gmp_urandomm_ui (rands, 15L);
+
+      set_prec = MAX (result_prec, input_prec);
+      refmpf_set_prec_limbs (got, set_prec);
+
+      /* input, non-zero, possibly negative */
+      PREC(got) = input_prec;
+      do {
+        mpf_random2 (got, input_prec, (mp_exp_t) 20);
+      } while (SIZ(got) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (got, got);
+
+      PREC(got) = result_prec;
+
+      mpf_div (got, got, got);
+
+      /* expect exactly 1.0 always */
+      ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);
+
+      PREC(got) = set_prec;
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_various (void)
+{
+  mpf_t got, u, v;
+
+  mpf_init (got);
+  mpf_init (u);
+  mpf_init (v);
+
+  /* 100/4 == 25 */
+  mpf_set_prec (got, 20L);
+  mpf_set_ui (u, 100L);
+  mpf_set_ui (v, 4L);
+  mpf_div (got, u, v);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);
+
+  /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+  mpf_set_prec (got, 500L);
+  mpf_set_prec (v, 900L);
+  mpf_set_ui (v, 1L);
+  mpf_mul_2exp (v, v, 800L);
+  mpf_add_ui (v, v, 1L);
+  mpf_div (got, u, v);
+  check_one ("1/2^n+1, separate", got, u, v);
+
+  mpf_clear (got);
+  mpf_clear (u);
+  mpf_clear (v);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+  check_reuse_three ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-dm2exp.c b/tests/mpf/t-dm2exp.c
new file mode 100644
index 0000000..d17c9fa
--- /dev/null
+++ b/tests/mpf/t-dm2exp.c

@@ -0,0 +1,118 @@
+/* Test mpf_div, mpf_div_2exp, mpf_mul_2exp.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  int reps = 100000;
+  int i;
+  mpf_t u, v, w1, w2, w3;
+  mp_size_t bprec = 100;
+  mpf_t rerr, limit_rerr;
+  mp_size_t un;
+  mp_exp_t ue;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (rerr);
+  mpf_init (limit_rerr);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w1);
+  mpf_init (w2);
+  mpf_init (w3);
+
+  for (i = 0; i < reps; i++)
+    {
+      unsigned long int res_prec;
+      unsigned long int pow2;
+
+      res_prec = urandom () % (bprec + 100);
+      mpf_set_prec (w1, res_prec);
+      mpf_set_prec (w2, res_prec);
+      mpf_set_prec (w3, res_prec);
+
+      mpf_set_ui (limit_rerr, 1);
+      mpf_div_2exp (limit_rerr, limit_rerr, res_prec);
+
+      pow2 = urandom () % 0x10000;
+      mpf_set_ui (v, 1);
+      mpf_mul_2exp (v, v, pow2);
+
+      un = urandom () % (2 * SIZE) - SIZE;
+      ue = urandom () % SIZE;
+      mpf_random2 (u, un, ue);
+
+      mpf_div_2exp (w1, u, pow2);
+      mpf_div (w2, u, v);
+      mpf_reldiff (rerr, w1, w2);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+	{
+	  printf ("ERROR in mpf_div or mpf_div_2exp after %d tests\n", i);
+	  printf ("   u = "); mpf_dump (u);
+	  printf ("   v = "); mpf_dump (v);
+	  printf ("  w1 = "); mpf_dump (w1);
+	  printf ("  w2 = "); mpf_dump (w2);
+	  abort ();
+	}
+      mpf_mul_2exp (w3, w1, pow2);
+      mpf_reldiff (rerr, u, w3);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+	{
+	  printf ("ERROR in mpf_mul_2exp after %d tests\n", i);
+	  printf ("   u = "); mpf_dump (u);
+	  printf ("   v = "); mpf_dump (v);
+	  printf ("  w1 = "); mpf_dump (w1);
+	  printf ("  w3 = "); mpf_dump (w3);
+	  abort ();
+	}
+    }
+
+  mpf_clear (rerr);
+  mpf_clear (limit_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w1);
+  mpf_clear (w2);
+  mpf_clear (w3);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-eq.c b/tests/mpf/t-eq.c
new file mode 100644
index 0000000..7b80b02
--- /dev/null
+++ b/tests/mpf/t-eq.c

@@ -0,0 +1,217 @@
+/* Test mpf_eq.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SZ (2 * sizeof(mp_limb_t))
+
+void insert_random_low_zero_limbs (mpf_t, gmp_randstate_ptr);
+void dump_abort (mpf_t, mpf_t, int, int, int, int, int, long);
+void hexdump (mpf_t);
+
+void
+check_data (void)
+{
+  static const struct
+  {
+    struct {
+      int        exp, size;
+      mp_limb_t  d[10];
+    } x, y;
+    mp_bitcnt_t bits;
+    int want;
+
+  } data[] = {
+    { { 0, 0, { 0 } },             { 0, 0, { 0 } },    0, 1 },
+
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } },    0, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } },   17, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } }, 4711, 1 },
+
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    0, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    2, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    3, 0 },
+
+    { { 0, 0, { 0 } },             { 0, 1, { 1 } },    0, 0 },
+    { { 0, 1, { 1 } },             { 0,-1 ,{ 1 } },    0, 0 },
+    { { 1, 1, { 1 } },             { 0, 1, { 1 } },    0, 0 },
+
+    { { 0, 1, { 8 } },             { 0, 1, { 4 } },    0, 0 },
+
+    { { 0, 2, { 0, 3 } },          { 0, 1, { 3 } }, 1000, 1 },
+  };
+
+  mpf_t  x, y;
+  int got, got_swapped;
+  int i;
+  mp_trace_base = 16;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      PTR(x) = (mp_ptr) data[i].x.d;
+      SIZ(x) = data[i].x.size;
+      EXP(x) = data[i].x.exp;
+      PREC(x) = numberof (data[i].x.d);
+      MPF_CHECK_FORMAT (x);
+
+      PTR(y) = (mp_ptr) data[i].y.d;
+      SIZ(y) = data[i].y.size;
+      EXP(y) = data[i].y.exp;
+      PREC(y) = numberof (data[i].y.d);
+      MPF_CHECK_FORMAT (y);
+
+      got         = mpf_eq (x, y, data[i].bits);
+      got_swapped = mpf_eq (y, x, data[i].bits);
+
+      if (got != got_swapped || got != data[i].want)
+	{
+	  printf ("check_data() wrong result at data[%d]\n", i);
+	  mpf_trace ("x   ", x);
+	  mpf_trace ("y   ", y);
+	  printf ("got         %d\n", got);
+	  printf ("got_swapped %d\n", got_swapped);
+	  printf ("want        %d\n", data[i].want);
+	  abort ();
+        }
+    }
+}
+
+void
+check_random (long reps)
+{
+  unsigned long test;
+  gmp_randstate_ptr rands = RANDS;
+  mpf_t a, b, x;
+  mpz_t ds;
+  int hibits, lshift1, lshift2;
+  int xtra;
+
+#define HIBITS 10
+#define LSHIFT1 10
+#define LSHIFT2 10
+
+  mpf_set_default_prec ((1 << HIBITS) + (1 << LSHIFT1) + (1 << LSHIFT2));
+
+  mpz_init (ds);
+  mpf_inits (a, b, x, NULL);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (ds, rands, HIBITS);
+      hibits = mpz_get_ui (ds) + 1;
+      mpz_urandomb (ds, rands, hibits);
+      mpz_setbit (ds, hibits  - 1);	/* make sure msb is set */
+      mpf_set_z (a, ds);
+      mpf_set_z (b, ds);
+
+      mpz_urandomb (ds, rands, LSHIFT1);
+      lshift1 = mpz_get_ui (ds);
+      mpf_mul_2exp (a, a, lshift1 + 1);
+      mpf_mul_2exp (b, b, lshift1 + 1);
+      mpf_add_ui (a, a, 1);	/* make a one-bit difference */
+
+      mpz_urandomb (ds, rands, LSHIFT2);
+      lshift2 = mpz_get_ui (ds);
+      mpf_mul_2exp (a, a, lshift2);
+      mpf_mul_2exp (b, b, lshift2);
+      mpz_urandomb (ds, rands, lshift2);
+      mpf_set_z (x, ds);
+      mpf_add (a, a, x);
+      mpf_add (b, b, x);
+
+      insert_random_low_zero_limbs (a, rands);
+      insert_random_low_zero_limbs (b, rands);
+
+      if (mpf_eq (a, b, lshift1 + hibits) == 0 ||
+	  mpf_eq (b, a, lshift1 + hibits) == 0)
+	{
+	  dump_abort (a, b, lshift1 + hibits, lshift1, lshift2, hibits, 1, test);
+	}
+      for (xtra = 1; xtra < 100; xtra++)
+	if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0 ||
+	    mpf_eq (b, a, lshift1 + hibits + xtra) != 0)
+	  {
+	    dump_abort (a, b, lshift1 + hibits + xtra, lshift1, lshift2, hibits, 0, test);
+	  }
+    }
+
+  mpf_clears (a, b, x, NULL);
+  mpz_clear (ds);
+}
+
+void
+insert_random_low_zero_limbs (mpf_t x, gmp_randstate_ptr rands)
+{
+  mp_size_t max = PREC(x) - SIZ(x);
+  mp_size_t s;
+  mpz_t ds; mpz_init (ds);
+  mpz_urandomb (ds, rands, 32);
+  s = mpz_get_ui (ds) % (max + 1);
+  MPN_COPY_DECR (PTR(x) + s, PTR(x), SIZ(x));
+  MPN_ZERO (PTR(x), s);
+  SIZ(x) += s;
+  mpz_clear (ds);
+}
+
+void
+dump_abort (mpf_t a, mpf_t b, int cmp_prec, int lshift1, int lshift2, int hibits, int want, long test)
+{
+  printf ("ERROR in test %ld\n", test);
+  printf ("want %d got %d from mpf_eq\n", want, 1-want);
+  printf ("cmp_prec = %d\n", cmp_prec);
+  printf ("lshift1 = %d\n", lshift1);
+  printf ("lshift2 = %d\n", lshift2);
+  printf ("hibits = %d\n", hibits);
+  hexdump (a); puts ("");
+  hexdump (b); puts ("");
+  abort ();
+}
+
+void
+hexdump (mpf_t x)
+{
+  mp_size_t i;
+  for (i = ABSIZ(x) - 1; i >= 0; i--)
+    {
+      gmp_printf ("%0*MX", SZ, PTR(x)[i]);
+      if (i != 0)
+	printf (" ");
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  long reps = 10000;
+
+  if (argc == 2)
+    reps = strtol (argv[1], 0, 0);
+
+  tests_start ();
+
+  check_data ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-fits.c b/tests/mpf/t-fits.c
new file mode 100644
index 0000000..937e4e6
--- /dev/null
+++ b/tests/mpf/t-fits.c

@@ -0,0 +1,327 @@
+/* Test mpf_fits_*_p
+
+Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise mpf_fits_*_p on a small amount
+   of data. */
+
+#define EXPECT_S(fun,name,answer)                                        \
+  got = fun (f);                                                         \
+  if (got != answer)                                                     \
+    {                                                                    \
+      printf ("%s (%s) got %d want %d\n", name, expr, got, answer);      \
+      printf (" f size %d exp %ld\n", SIZ(f), EXP(f));                   \
+      printf (" f dec "); mpf_out_str (stdout, 10, 0, f); printf ("\n"); \
+      printf (" f hex "); mpf_out_str (stdout, 16, 0, f); printf ("\n"); \
+      error = 1;                                                         \
+    }
+
+#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)
+
+int
+main (void)
+{
+  mpf_t       f, f0p5;
+  int         got;
+  const char  *expr;
+  int         error = 0;
+
+  tests_start ();
+  mpf_init2 (f, 200L);
+  mpf_init2 (f0p5, 200L);
+
+  /* 0.5 */
+  mpf_set_ui (f0p5, 1L);
+  mpf_div_2exp (f0p5, f0p5, 1L);
+
+  mpf_set_ui (f, 0L);
+  expr = "0";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_ui (f, 1L);
+  expr = "1";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, -1L);
+  expr = "-1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  expr = "USHRT_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "USHRT_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "USHRT_MAX + 1";
+  EXPECT (mpf_fits_ushort_p, 0);
+
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  expr = "UINT_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "UINT_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "UINT_MAX + 1";
+  EXPECT (mpf_fits_uint_p, 0);
+
+
+  mpf_set_ui (f, ULONG_MAX);
+  expr = "ULONG_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+
+  mpf_set_ui (f, ULONG_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "ULONG_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+
+  mpf_set_ui (f, ULONG_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "ULONG_MAX + 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  expr = "SHRT_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  expr = "SHRT_MAX + 0.5";
+  mpf_add (f, f, f0p5);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "SHRT_MAX + 1";
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_si (f, (long) INT_MAX);
+  expr = "INT_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "INT_MAX + 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "INT_MAX + 1";
+  EXPECT (mpf_fits_sint_p, 0);
+
+
+  mpf_set_si (f, LONG_MAX);
+  expr = "LONG_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "LONG_MAX + 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "LONG_MAX + 1";
+  EXPECT (mpf_fits_slong_p, 0);
+
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  expr = "SHRT_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "SHRT_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "SHRT_MIN - 1";
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_si (f, (long) INT_MIN);
+  expr = "INT_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "INT_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "INT_MIN - 1";
+  EXPECT (mpf_fits_sint_p, 0);
+
+
+  mpf_set_si (f, LONG_MIN);
+  expr = "LONG_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "LONG_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "LONG_MIN - 1";
+  EXPECT (mpf_fits_slong_p, 0);
+
+
+  mpf_set_str_or_abort (f, "0.5", 10);
+  expr = "0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_str_or_abort (f, "-0.5", 10);
+  expr = "-0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_str_or_abort (f, "-1.5", 10);
+  expr = "-1.5";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+
+  mpf_set_str_or_abort (f, "1.000000000000000000000000000000000001", 16);
+  expr = "1.000000000000000000000000000000000001 base 16";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_str_or_abort (f, "1@1000", 16);
+  expr = "1@1000 base 16";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);
+  mpf_sub_ui (f, f, 1L);
+  expr = "2^(BITS_PER_ULONG+1) - 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);
+  mpf_ui_sub (f, 1L, f);
+  expr = "- (2^(BITS_PER_ULONG+1) - 1)";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 5);
+  mpf_sub_ui (f, f, 1L);
+  expr = "2^(BITS_PER_ULONG+5) - 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  if (error)
+    abort ();
+
+  mpf_clear (f);
+  mpf_clear (f0p5);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-get_d.c b/tests/mpf/t-get_d.c
new file mode 100644
index 0000000..4e4c741
--- /dev/null
+++ b/tests/mpf/t-get_d.c

@@ -0,0 +1,106 @@
+/* Test mpf_get_d and mpf_set_d.
+
+Copyright 1996, 1999-2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if defined (__vax) || defined (__vax__)
+#define LOW_BOUND 1e-38
+#define HIGH_BOUND 8e37
+#endif
+
+#if defined (_CRAY) && ! defined (_CRAYIEEE)
+/* The range varies mysteriously between Cray version.  On an SV1,
+   the range seem to be 1e-600..1e603, but a cfp (non-ieee) T90
+   has a much smaller range of 1e-240..1e240.  */
+#define LOW_BOUND 1e-240
+#define HIGH_BOUND 1e240
+#endif
+
+#if ! defined (LOW_BOUND)
+#define LOW_BOUND 1e-300
+#define HIGH_BOUND 1e300
+#endif
+
+void
+test_denorms (int prc)
+{
+#ifdef _GMP_IEEE_FLOATS
+  double d1, d2;
+  mpf_t f;
+  int i;
+
+  mpf_set_default_prec (prc);
+
+  mpf_init (f);
+
+  d1 = 1.9;
+  for (i = 0; i < 820; i++)
+    {
+      mpf_set_d (f, d1);
+      d2 = mpf_get_d (f);
+      if (d1 != d2)
+        abort ();
+      d1 *= 0.4;
+    }
+
+  mpf_clear (f);
+#endif
+}
+
+int
+main (int argc, char **argv)
+{
+  double d, e, r;
+  mpf_t u, v;
+
+  tests_start ();
+  mpf_init (u);
+  mpf_init (v);
+
+  mpf_set_d (u, LOW_BOUND);
+  for (d = 2.0 * LOW_BOUND; d < HIGH_BOUND; d *= 1.01)
+    {
+      mpf_set_d (v, d);
+      if (mpf_cmp (u, v) >= 0)
+	abort ();
+      e = mpf_get_d (v);
+      r = e/d;
+      if (r < 0.99999999999999 || r > 1.00000000000001)
+	{
+	  fprintf (stderr, "should be one ulp from 1: %.16f\n", r);
+	  abort ();
+	}
+      mpf_set (u, v);
+    }
+
+  mpf_clear (u);
+  mpf_clear (v);
+
+  test_denorms (10);
+  test_denorms (32);
+  test_denorms (64);
+  test_denorms (100);
+  test_denorms (200);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-get_d_2exp.c b/tests/mpf/t-get_d_2exp.c
new file mode 100644
index 0000000..b716cf8
--- /dev/null
+++ b/tests/mpf/t-get_d_2exp.c

@@ -0,0 +1,136 @@
+/* Test mpf_get_d_2exp.
+
+Copyright 2002, 2003, 2017, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+static void
+check_data (void)
+{
+  mpf_t   f;
+  double  got, want;
+  long    got_exp;
+  long    exp;
+  struct {
+    int base;
+    int shift;
+  } data[] = {
+   {-1, 1}, {-3, 2}, {-5, 3}, {-7, 3}, { 1, 1}, { 3, 2}, { 5, 3}, { 7, 3}
+  };
+
+  mpf_init2 (f, 3);
+
+  got = mpf_get_d_2exp (&got_exp, f);
+  if (got != 0 || got_exp != 0)
+    {
+      printf    ("mpf_get_d_2exp wrong on zero\n");
+      mpf_trace ("   f    ", f);
+      d_trace   ("   got  ", got);
+      printf    ("   got exp  %ld\n", got_exp);
+      abort();
+    }
+
+  for (exp = -513; exp <= 513; exp++)
+    {
+      size_t i;
+      for (i = 0; i < numberof (data); i++)
+	{
+	  want = (double) data[i].base / (1 << data[i].shift);
+	  mpf_set_d (f, want);
+
+	  if (exp >= 0)
+	    mpf_mul_2exp (f, f, exp);
+	  else
+	    mpf_div_2exp (f, f, -exp);
+
+	  got = mpf_get_d_2exp (&got_exp, f);
+	  if (got != want || got_exp != exp)
+	    {
+	      printf    ("mpf_get_d_2exp wrong on 2**%ld\n", exp);
+	      mpf_trace ("   f    ", f);
+	      d_trace   ("   want ", want);
+	      d_trace   ("   got  ", got);
+	      printf    ("   want exp %ld\n", exp);
+	      printf    ("   got exp  %ld\n", got_exp);
+	      abort();
+	    }
+	}
+    }
+  mpf_clear (f);
+}
+
+/* Check that hardware rounding doesn't make mpf_get_d_2exp return a value
+   outside its defined range. */
+static void
+check_round (void)
+{
+  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };
+  mpf_t   f;
+  double  got;
+  long    got_exp;
+  int     i, rnd_mode, old_rnd_mode;
+
+  mpf_init2 (f, 1024L);
+  old_rnd_mode = tests_hardware_getround ();
+
+  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)
+    {
+      tests_hardware_setround (rnd_mode);
+
+      for (i = 0; i < numberof (data); i++)
+        {
+          mpf_set_ui (f, 1L);
+          mpf_mul_2exp (f, f, data[i]);
+          mpf_sub_ui (f, f, 1L);
+
+          got = mpf_get_d_2exp (&got_exp, f);
+          if (got < 0.5 || got >= 1.0)
+            {
+              printf    ("mpf_get_d_2exp bad on 2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect 0.5 <= got < 1.0\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpf_trace ("   f    ", f);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+        }
+    }
+
+  mpf_clear (f);
+  tests_hardware_setround (old_rnd_mode);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_data ();
+  check_round ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-get_si.c b/tests/mpf/t-get_si.c
new file mode 100644
index 0000000..5510b04
--- /dev/null
+++ b/tests/mpf/t-get_si.c

@@ -0,0 +1,222 @@
+/* Exercise mpz_get_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    int         base;
+    const char  *f;
+    long        want;
+  } data[] = {
+    { 10, "0",      0L },
+    { 10, "1",      1L },
+    { 10, "-1",     -1L },
+    { 10, "2",      2L },
+    { 10, "-2",     -2L },
+    { 10, "12345",  12345L },
+    { 10, "-12345", -12345L },
+
+    /* fraction bits ignored */
+    { 10, "0.5",    0L },
+    { 10, "-0.5",   0L },
+    { 10, "1.1",    1L },
+    { 10, "-1.1",   -1L },
+    { 10, "1.9",    1L },
+    { 10, "-1.9",   -1L },
+    { 16, "1.000000000000000000000000000000000000000000000000001", 1L },
+    { 16, "-1.000000000000000000000000000000000000000000000000001", -1L },
+
+    /* low bits extracted (this is undocumented) */
+    { 16, "1000000000000000000000000000000000000000000000000001", 1L },
+    { 16, "-1000000000000000000000000000000000000000000000000001", -1L },
+  };
+
+  int    i;
+  mpf_t  f;
+  long   got;
+
+  mpf_init2 (f, 2000L);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (f, data[i].f, data[i].base);
+
+      got = mpf_get_si (f);
+      if (got != data[i].want)
+	{
+	  printf ("mpf_get_si wrong at data[%d]\n", i);
+	  printf ("   f     \"%s\"\n", data[i].f);
+	  printf ("     dec "); mpf_out_str (stdout, 10, 0, f); printf ("\n");
+	  printf ("     hex "); mpf_out_str (stdout, 16, 0, f); printf ("\n");
+	  printf ("     size %ld\n", (long) SIZ(f));
+	  printf ("     exp  %ld\n", (long) EXP(f));
+	  printf ("   got   %ld (0x%lX)\n", got, got);
+	  printf ("   want  %ld (0x%lX)\n", data[i].want, data[i].want);
+	  abort();
+	}
+    }
+  mpf_clear (f);
+}
+
+
+void
+check_max (void)
+{
+  mpf_t  f;
+  long   want;
+  long   got;
+
+  mpf_init2 (f, 200L);
+
+#define CHECK_MAX(name)                                         \
+  if (got != want)                                              \
+    {                                                           \
+      printf ("mpf_get_si wrong on %s\n", name);                \
+      printf ("   f    ");                                      \
+      mpf_out_str (stdout, 10, 0, f); printf (", hex ");        \
+      mpf_out_str (stdout, 16, 0, f); printf ("\n");            \
+      printf ("   got  %ld, hex %lX\n", got, got);              \
+      printf ("   want %ld, hex %lX\n", want, want);            \
+      abort();                                                  \
+    }
+
+  want = LONG_MAX;
+  mpf_set_si (f, want);
+  got = mpf_get_si (f);
+  CHECK_MAX ("LONG_MAX");
+
+  want = LONG_MIN;
+  mpf_set_si (f, want);
+  got = mpf_get_si (f);
+  CHECK_MAX ("LONG_MIN");
+
+  mpf_clear (f);
+}
+
+
+void
+check_limbdata (void)
+{
+#define M  GMP_NUMB_MAX
+
+  static const struct {
+    mp_exp_t       exp;
+    mp_size_t      size;
+    mp_limb_t      d[10];
+    unsigned long  want;
+
+  } data[] = {
+
+    /* in the comments here, a "_" indicates a digit (ie. limb) position not
+       included in the d data, and therefore zero */
+
+    { 0, 0, { 0 }, 0L },    /* 0 */
+
+    { 1,  1, { 1 }, 1L },   /* 1 */
+    { 1, -1, { 1 }, -1UL },  /* -1 */
+
+    { 0,  1, { 1 }, 0L },   /* .1 */
+    { 0, -1, { 1 }, 0L },   /* -.1 */
+
+    { -1,  1, { 1 }, 0L },  /* ._1 */
+    { -1, -1, { 1 }, 0L },  /* -._1 */
+
+    { -999,          1, { 1 }, 0L },   /* .___1 small */
+    { MP_EXP_T_MIN,  1, { 1 }, 0L },   /* .____1 very small */
+
+    { 999,          1, { 1 }, 0L },    /* 1____. big */
+    { MP_EXP_T_MAX, 1, { 1 }, 0L },    /* 1_____. very big */
+
+    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */
+    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3L },  /* 10003.987 */
+
+    { 2, 2, { M, M },    LONG_MAX }, /* FF. */
+    { 2, 2, { M, M, M }, LONG_MAX }, /* FF.F */
+    { 3, 3, { M, M, M }, LONG_MAX }, /* FFF. */
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    /* normal case, numb bigger than long */
+    { 2,  1, { 1 },    0L },      /* 1_. */
+    { 2,  2, { 0, 1 }, 0L },      /* 10. */
+    { 2,  2, { 999, 1 }, 999L },  /* 19. */
+    { 3,  2, { 999, 1 }, 0L },    /* 19_. */
+
+#else
+    /* nails case, numb smaller than long */
+    { 2,  1, { 1 }, 1L << GMP_NUMB_BITS },  /* 1_. */
+    { 3,  1, { 1 }, 0L },                   /* 1__. */
+
+    { 2,  2, { 99, 1 },    99L + (1L << GMP_NUMB_BITS) },  /* 19. */
+    { 3,  2, { 1, 99 },    1L << GMP_NUMB_BITS },          /* 91_. */
+    { 3,  3, { 0, 1, 99 }, 1L << GMP_NUMB_BITS },          /* 910. */
+
+#endif
+  };
+
+  mpf_t          f;
+  unsigned long  got;
+  int            i;
+  mp_limb_t      buf[20 + numberof(data[i].d)];
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));
+      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));
+      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));
+
+      PTR(f) = buf+10;
+      EXP(f) = data[i].exp;
+      SIZ(f) = data[i].size;
+      PREC(f) = numberof (data[i].d);
+      MPF_CHECK_FORMAT (f);
+
+      got = mpf_get_si (f);
+      if (got != data[i].want)
+	{
+	  printf    ("mpf_get_si wrong at limb data[%d]\n", i);
+	  mpf_trace ("  f", f);
+	  mpn_trace ("  d", data[i].d, data[i].size);
+	  printf    ("  size %ld\n", (long) data[i].size);
+	  printf    ("  exp %ld\n", (long) data[i].exp);
+	  printf    ("  got   %lu (0x%lX)\n", got, got);
+	  printf    ("  want  %lu (0x%lX)\n", data[i].want, data[i].want);
+	  abort();
+	}
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_max ();
+  check_limbdata ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-get_ui.c b/tests/mpf/t-get_ui.c
new file mode 100644
index 0000000..6011b0f
--- /dev/null
+++ b/tests/mpf/t-get_ui.c

@@ -0,0 +1,127 @@
+/* Exercise mpf_get_ui.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_limbdata (void)
+{
+#define M  GMP_NUMB_MAX
+
+  static const struct {
+    mp_exp_t       exp;
+    mp_size_t      size;
+    mp_limb_t      d[10];
+    unsigned long  want;
+
+  } data[] = {
+
+    /* in the comments here, a "_" indicates a digit (ie. limb) position not
+       included in the d data, and therefore zero */
+
+    { 0, 0, { 0 }, 0L },    /* 0 */
+
+    { 1,  1, { 1 }, 1L },   /* 1 */
+    { 1, -1, { 1 }, 1L },   /* -1 */
+
+    { 0,  1, { 1 }, 0L },   /* .1 */
+    { 0, -1, { 1 }, 0L },   /* -.1 */
+
+    { -1,  1, { 1 }, 0L },  /* ._1 */
+    { -1, -1, { 1 }, 0L },  /* -._1 */
+
+    { -999,          1, { 1 }, 0L },   /* .___1 small */
+    { MP_EXP_T_MIN,  1, { 1 }, 0L },   /* .____1 very small */
+
+    { 999,          1, { 1 }, 0L },    /* 1____. big */
+    { MP_EXP_T_MAX, 1, { 1 }, 0L },    /* 1_____. very big */
+
+    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */
+    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3L },  /* 10003.987 */
+
+    { 2, 2, { M, M },    ULONG_MAX }, /* FF. */
+    { 2, 2, { M, M, M }, ULONG_MAX }, /* FF.F */
+    { 3, 3, { M, M, M }, ULONG_MAX }, /* FFF. */
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    /* normal case, numb bigger than long */
+    { 2,  1, { 1 },    0L },      /* 1_. */
+    { 2,  2, { 0, 1 }, 0L },      /* 10. */
+    { 2,  2, { 999, 1 }, 999L },  /* 19. */
+    { 3,  2, { 999, 1 }, 0L },    /* 19_. */
+
+#else
+    /* nails case, numb smaller than long */
+    { 2,  1, { 1 }, 1L << GMP_NUMB_BITS },  /* 1_. */
+    { 3,  1, { 1 }, 0L },                   /* 1__. */
+
+    { 2,  2, { 99, 1 },    99L + (1L << GMP_NUMB_BITS) },  /* 19. */
+    { 3,  2, { 1, 99 },    1L << GMP_NUMB_BITS },          /* 91_. */
+    { 3,  3, { 0, 1, 99 }, 1L << GMP_NUMB_BITS },          /* 910. */
+
+#endif
+  };
+
+  mpf_t          f;
+  unsigned long  got;
+  int            i;
+  mp_limb_t      buf[20 + numberof(data[i].d)];
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));
+      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));
+      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));
+
+      PTR(f) = buf+10;
+      EXP(f) = data[i].exp;
+      SIZ(f) = data[i].size;
+      PREC(f) = numberof (data[i].d);
+      MPF_CHECK_FORMAT (f);
+
+      got = mpf_get_ui (f);
+      if (got != data[i].want)
+	{
+	  printf    ("mpf_get_ui wrong at limb data[%d]\n", i);
+	  mpf_trace ("  f", f);
+	  mpn_trace ("  d", data[i].d, data[i].size);
+	  printf    ("  size %ld\n", (long) data[i].size);
+	  printf    ("  exp %ld\n", (long) data[i].exp);
+	  printf    ("  got   %lu (0x%lX)\n", got, got);
+	  printf    ("  want  %lu (0x%lX)\n", data[i].want, data[i].want);
+	  abort();
+	}
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_limbdata ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-gsprec.c b/tests/mpf/t-gsprec.c
new file mode 100644
index 0000000..da07f3e
--- /dev/null
+++ b/tests/mpf/t-gsprec.c

@@ -0,0 +1,61 @@
+/* Test mpf_get_prec and mpf_set_prec.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_consistency (void)
+{
+  mpf_t  x;
+  unsigned long  i, a, b;
+
+  mpf_init (x);
+
+  for (i = 1; i < 2000; i++)
+    {
+      mpf_set_prec (x, i);
+      a = mpf_get_prec (x);
+      mpf_set_prec (x, a);
+      b = mpf_get_prec (x);
+      if (a != b)
+        {
+          printf ("mpf_get_prec / mpf_set_prec inconsistent\n");
+          printf ("   set %lu gives %lu, but then set %lu gives %lu\n",
+                  i, a,
+                  a, b);
+          abort ();
+        }
+    }
+
+  mpf_clear (x);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_consistency ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-inp_str.c b/tests/mpf/t-inp_str.c
new file mode 100644
index 0000000..75e0fce
--- /dev/null
+++ b/tests/mpf/t-inp_str.c

@@ -0,0 +1,191 @@
+/* Test mpf_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>		/* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "/tmp/t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+
+    { "abc", 10, "0", 0 },
+    { "ghi", 16, "0", 0 },
+
+    { "125",    10, "125",  3 },
+    { "125e1",  10, "1250", 5 },
+    { "12e+2",  10, "1200", 5 },
+    { "125e-1", 10, "12.5", 6 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    { "100",     16, "256",  3 },
+    { "100@1",   16, "4096", 5 },
+    { "100@10",  16, "4722366482869645213696", 6 },
+    { "100@10", -16, "281474976710656",        6 },
+    { "100@-1",  16, "16",   6 },
+    { "10000000000000000@-10",  16, "1", 21 },
+    { "10000000000@-10",       -16, "1", 15 },
+
+    { "z", 36, "35", 1 },
+    { "Z", 36, "35", 1 },
+    { "z@1", 36, "1260", 3 },
+    { "Z@1", 36, "1260", 3 },
+
+    {  "0",      0,   "0", 1 },
+  };
+
+  mpf_t  got, want;
+  long   ftell_nread;
+  int    i, pre, post, j, got_nread, want_nread;
+  FILE   *fp;
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (pre = 0; pre <= 3; pre++)
+        {
+          for (post = 0; post <= 2; post++)
+            {
+              mpf_set_str_or_abort (want, data[i].want, 10);
+              MPF_CHECK_FORMAT (want);
+
+              /* create the file new each time to ensure its length is what
+                 we want */
+              fp = fopen (FILENAME, "w+");
+              ASSERT_ALWAYS (fp != NULL);
+              for (j = 0; j < pre; j++)
+                putc (' ', fp);
+              fputs (data[i].inp, fp);
+              for (j = 0; j < post; j++)
+                putc (' ', fp);
+              fflush (fp);
+              ASSERT_ALWAYS (! ferror(fp));
+
+              rewind (fp);
+              got_nread = mpf_inp_str (got, fp, data[i].base);
+
+              if (got_nread != 0)
+                {
+                  ftell_nread = ftell (fp);
+                  if (got_nread != ftell_nread)
+                    {
+                      printf ("mpf_inp_str nread wrong\n");
+                      printf ("  inp          \"%s\"\n", data[i].inp);
+                      printf ("  base         %d\n", data[i].base);
+                      printf ("  pre          %d\n", pre);
+                      printf ("  post         %d\n", post);
+                      printf ("  got_nread    %d\n", got_nread);
+                      printf ("  ftell_nread  %ld\n", ftell_nread);
+                      abort ();
+                    }
+                }
+
+              /* if data[i].inp is a whole string to read and there's no post
+                 whitespace then expect to have EOF */
+              if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+                {
+                  int  c = getc(fp);
+                  if (c != EOF)
+                    {
+                      printf ("mpf_inp_str didn't read to EOF\n");
+                      printf ("  inp   \"%s\"\n", data[i].inp);
+                      printf ("  base  %d\n", data[i].base);
+                      printf ("  pre   %d\n", pre);
+                      printf ("  post  %d\n", post);
+                      printf ("  c     '%c' %#x\n", c, c);
+                      abort ();
+                    }
+                }
+
+              /* only expect "pre" included in the count when non-zero */
+              want_nread = data[i].want_nread;
+              if (want_nread != 0)
+                want_nread += pre;
+
+              if (got_nread != want_nread)
+                {
+                  printf ("mpf_inp_str nread wrong\n");
+                  printf ("  inp         \"%s\"\n", data[i].inp);
+                  printf ("  base        %d\n", data[i].base);
+                  printf ("  pre         %d\n", pre);
+                  printf ("  post        %d\n", post);
+                  printf ("  got_nread   %d\n", got_nread);
+                  printf ("  want_nread  %d\n", want_nread);
+                  abort ();
+                }
+
+              MPF_CHECK_FORMAT (got);
+
+              if (mpf_cmp (got, want) != 0)
+                {
+                  printf ("mpf_inp_str wrong result\n");
+                  printf ("  inp   \"%s\"\n", data[i].inp);
+                  printf ("  base  %d\n", data[i].base);
+                  mpf_trace ("  got ",  got);
+                  mpf_trace ("  want", want);
+                  abort ();
+                }
+
+              ASSERT_ALWAYS (fclose (fp) == 0);
+            }
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-int_p.c b/tests/mpf/t-int_p.c
new file mode 100644
index 0000000..3e536db
--- /dev/null
+++ b/tests/mpf/t-int_p.c

@@ -0,0 +1,90 @@
+/* Test mpf_integer_p.
+
+Copyright 2001, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+one (mpf_srcptr f, int want)
+{
+  int  got;
+  got = mpf_integer_p (f);
+  if (got != want)
+    {
+      printf ("mpf_integer_p got %d want %d\n", got, want);
+      mpf_trace (" f", f);
+      abort ();
+    }
+}
+
+void
+all (mpf_ptr f, int want)
+{
+  one (f, want);
+  mpf_neg (f, f);
+  one (f, want);
+}
+
+int
+main (void)
+{
+  mpf_t  f;
+
+  tests_start ();
+  mpf_init2 (f, 200L);
+
+  mpf_set_ui (f, 0L);
+  one (f, 1);
+
+  mpf_set_ui (f, 1L);
+  all (f, 1);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_2exp (f, f, 1L);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_2exp (f, f, 5000L);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, 5000L);
+  all (f, 1);
+
+  mpf_set_str (f, "0.5", 10);
+  all (f, 0);
+
+  mpf_set_str (f, "2.5", 10);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_ui (f, f, 3L);
+  all (f, 0);
+
+  mpf_set_ui (f, 7L);
+  mpf_div_ui (f, f, 3L);
+  all (f, 0);
+
+  mpf_clear (f);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-mul_ui.c b/tests/mpf/t-mul_ui.c
new file mode 100644
index 0000000..f362bb2
--- /dev/null
+++ b/tests/mpf/t-mul_ui.c

@@ -0,0 +1,164 @@
+/* Exercise mpf_mul_ui.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, mpf_srcptr u, unsigned long v)
+{
+  mp_size_t  usize, usign;
+  mp_ptr     wp;
+  mpf_t      want;
+
+  MPF_CHECK_FORMAT (got);
+
+  /* this code not nailified yet */
+  ASSERT_ALWAYS (BITS_PER_ULONG <= GMP_NUMB_BITS);
+  usign = SIZ (u);
+  usize = ABS (usign);
+  wp = refmpn_malloc_limbs (usize + 1);
+  wp[usize] = mpn_mul_1 (wp, PTR(u), usize, (mp_limb_t) v);
+
+  PTR(want) = wp;
+  SIZ(want) = (usign >= 0 ? usize+1 : -(usize+1));
+  EXP(want) = EXP(u) + 1;
+  refmpf_normalize (want);
+
+  if (! refmpf_validate ("mpf_mul_ui", got, want))
+    {
+      mp_trace_base = -16;
+      printf    ("  %s\n", desc);
+      mpf_trace ("  u", u);
+      printf    ("  v %ld  0x%lX\n", v, v);
+      abort ();
+    }
+
+  free (wp);
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  mpf_t              got, u;
+  unsigned long      prec, v;
+  int                i;
+
+  /* The nails code in mpf_mul_ui currently isn't exact, so suppress these
+     tests for now.  */
+  if (BITS_PER_ULONG > GMP_NUMB_BITS)
+    return;
+
+  mpf_init (got);
+  mpf_init (u);
+
+  for (i = 0; i < 200; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (u, prec);
+
+      /* u, possibly negative */
+      mpf_random2 (u, PREC(u), (mp_exp_t) 20);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (u, u);
+
+      /* v, 0 to BITS_PER_ULONG bits (inclusive) */
+      prec = gmp_urandomm_ui (rands, BITS_PER_ULONG+1);
+      v = gmp_urandomb_ui (rands, prec);
+
+      if ((i % 2) == 0)
+        {
+          /* separate */
+          mpf_mul_ui (got, u, v);
+          check_one ("separate", got, u, v);
+        }
+      else
+        {
+          /* overlap */
+          prec = refmpf_set_overlap (got, u);
+          mpf_mul_ui (got, got, v);
+          check_one ("overlap src==dst", got, u, v);
+
+          mpf_set_prec_raw (got, prec);
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (u);
+}
+
+void
+check_various (void)
+{
+  mpf_t  u, got, want;
+  const char   *s;
+
+  mpf_init2 (u,    2*8*sizeof(long));
+  mpf_init2 (got,  2*8*sizeof(long));
+  mpf_init2 (want, 2*8*sizeof(long));
+
+  s = "0 * ULONG_MAX";
+  mpf_set_ui (u, 0L);
+  mpf_mul_ui (got, u, ULONG_MAX);
+  MPF_CHECK_FORMAT (got);
+  mpf_set_ui (want, 0L);
+  if (mpf_cmp (got, want) != 0)
+    {
+    error:
+      printf ("Wrong result from %s\n", s);
+      mpf_trace ("u   ", u);
+      mpf_trace ("got ", got);
+      mpf_trace ("want", want);
+      abort ();
+    }
+
+  s = "1 * ULONG_MAX";
+  mpf_set_ui (u, 1L);
+  mpf_mul_ui (got, u, ULONG_MAX);
+  MPF_CHECK_FORMAT (got);
+  mpf_set_ui (want, ULONG_MAX);
+  if (mpf_cmp (got, want) != 0)
+    goto error;
+
+  mpf_clear (u);
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-muldiv.c b/tests/mpf/t-muldiv.c
new file mode 100644
index 0000000..86dca57
--- /dev/null
+++ b/tests/mpf/t-muldiv.c

@@ -0,0 +1,158 @@
+/* Test mpf_mul, mpf_div, mpf_ui_div, and mpf_div_ui.
+
+Copyright 1996, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 10000;
+  int i;
+  mpf_t u, v, w, x;
+  mp_size_t bprec = SIZE * GMP_LIMB_BITS;
+  mpf_t rerr, limit_rerr;
+  unsigned long ulimb, vlimb;
+  int single_flag;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (rerr);
+  mpf_init (limit_rerr);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (x);
+
+  for (i = 0; i < reps; i++)
+    {
+      mp_size_t res_prec;
+
+      res_prec = urandom () % bprec + 1;
+      mpf_set_prec (w, res_prec);
+      mpf_set_prec (x, res_prec);
+
+      mpf_set_ui (limit_rerr, 1);
+      mpf_div_2exp (limit_rerr, limit_rerr, res_prec - 1);
+
+      single_flag = 0;
+
+      if ((urandom () & 1) != 0)
+	{
+	  size = urandom () % (2 * SIZE) - SIZE;
+	  exp = urandom () % SIZE;
+	  mpf_random2 (u, size, exp);
+	}
+      else
+	{
+	  ulimb = urandom ();
+	  mpf_set_ui (u, ulimb);
+	  single_flag = 1;
+	}
+
+      if ((urandom () & 1) != 0)
+	{
+	  size = urandom () % (2 * SIZE) - SIZE;
+	  exp = urandom () % SIZE;
+	  mpf_random2 (v, size, exp);
+	}
+      else
+	{
+	  vlimb = urandom ();
+	  mpf_set_ui (v, vlimb);
+	  single_flag = 2;
+	}
+
+      if (mpf_sgn (v) == 0)
+	continue;
+
+      mpf_div (w, u, v);
+      mpf_mul (x, w, v);
+      mpf_reldiff (rerr, u, x);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+	{
+	  printf ("ERROR in mpf_mul or mpf_div after %d tests\n", i);
+	  printf ("   u = "); mpf_dump (u);
+	  printf ("   v = "); mpf_dump (v);
+	  printf ("   x = "); mpf_dump (x);
+	  printf ("   w = "); mpf_dump (w);
+	  abort ();
+	}
+
+      if (single_flag == 2)
+	{
+	  mpf_div_ui (x, u, vlimb);
+	  mpf_reldiff (rerr, w, x);
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR in mpf_div or mpf_div_ui after %d tests\n", i);
+	      printf ("   u = "); mpf_dump (u);
+	      printf ("   v = "); mpf_dump (v);
+	      printf ("   x = "); mpf_dump (x);
+	      printf ("   w = "); mpf_dump (w);
+	      abort ();
+	    }
+	}
+
+      if (single_flag == 1)
+	{
+	  mpf_ui_div (x, ulimb, v);
+	  mpf_reldiff (rerr, w, x);
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR in mpf_div or mpf_ui_div after %d tests\n", i);
+	      printf ("   u = "); mpf_dump (u);
+	      printf ("   v = "); mpf_dump (v);
+	      printf ("   x = "); mpf_dump (x);
+	      printf ("   w = "); mpf_dump (w);
+	      abort ();
+	    }
+	}
+    }
+
+  mpf_clear (rerr);
+  mpf_clear (limit_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (x);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-pow_ui.c b/tests/mpf/t-pow_ui.c
new file mode 100644
index 0000000..f005301
--- /dev/null
+++ b/tests/mpf/t-pow_ui.c

@@ -0,0 +1,69 @@
+/* Test mpf_pow_ui
+
+Copyright 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  unsigned int b, e;
+  mpf_t b1, r, r2, limit;
+
+  mpf_inits (b1, r, r2, NULL);
+  mpf_init_set_ui (limit, 1);
+  mpf_mul_2exp (limit, limit, MAX (GMP_NUMB_BITS, 53));
+
+  /* This test just test integers with results that fit in a single
+     limb or 53 bits.  This avoids any rounding.  */
+
+  for (b = 0; b <= 400; b++)
+    {
+      mpf_set_ui (b1, b);
+      mpf_set_ui (r2, 1);
+      for (e = 0; e <= GMP_LIMB_BITS; e++)
+	{
+	  mpf_pow_ui (r, b1, e);
+
+	  if (mpf_cmp (r, r2))
+	    abort ();
+
+	  mpf_mul_ui (r2, r2, b);
+
+	  if (mpf_cmp (r2, limit) >= 0)
+	    break;
+	}
+    }
+
+  mpf_clears (b1, r, r2, limit, NULL);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-set.c b/tests/mpf/t-set.c
new file mode 100644
index 0000000..2510748
--- /dev/null
+++ b/tests/mpf/t-set.c

@@ -0,0 +1,112 @@
+/* Test mpf_set, mpf_init_set.
+
+Copyright 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_reuse (void)
+{
+  /* Try mpf_set(f,f) when f is bigger than prec.  In the past this had
+     resulted in an MPN_COPY with invalid operand overlap. */
+  mpf_t  f;
+  mp_size_t      limbs = 20;
+  unsigned long  bits = limbs * GMP_NUMB_BITS;
+  mpf_init2 (f, bits);
+  refmpf_fill (f, limbs, GMP_NUMB_MAX);
+  mpf_set_prec_raw (f, bits / 2);
+  mpf_set (f, f);
+  MPF_CHECK_FORMAT (f);
+  mpf_set_prec_raw (f, bits);
+  mpf_clear (f);
+}
+
+void
+check_random (long reps)
+{
+  unsigned long test;
+  gmp_randstate_ptr rands;
+  mpf_t a, b;
+  mpz_t z;
+  int precbits;
+
+#define PRECBITS 10
+
+  rands = RANDS;
+
+  mpz_init (z);
+  mpf_init2 (a, 1 << PRECBITS);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (z, rands, PRECBITS + 1);
+      precbits = mpz_get_ui (z) + 1;
+      mpz_urandomb (z, rands, precbits);
+      mpz_setbit (z, precbits  - 1);	/* make sure msb is set */
+      mpf_set_z (a, z);
+      if (precbits & 1)
+	mpf_neg (a, a);
+      mpz_urandomb (z, rands, PRECBITS);
+      mpf_div_2exp (a, a, mpz_get_ui (z) + 1);
+      mpz_urandomb (z, rands, PRECBITS);
+      precbits -= mpz_get_ui (z);
+      if (precbits <= 0)
+	precbits = 1 - precbits;
+      mpf_set_default_prec (precbits);
+
+      mpf_init_set (b, a);
+      MPF_CHECK_FORMAT (b);
+      if (!mpf_eq (a, b, precbits))
+	{
+	  printf ("mpf_init_set wrong.\n");
+	  abort();
+	}
+
+      mpf_set_ui (b, 0);
+      mpf_set (b, a);
+      MPF_CHECK_FORMAT (b);
+      if (!mpf_eq (a, b, precbits))
+	{
+	  printf ("mpf_set wrong.\n");
+	  abort();
+	}
+
+      mpf_clear (b);
+    }
+
+  mpf_clear (a);
+  mpz_clear (z);
+}
+
+int
+main (int argc, char *argv[])
+{
+  long reps = 10000;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  check_reuse ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-set_q.c b/tests/mpf/t-set_q.c
new file mode 100644
index 0000000..86dec6e
--- /dev/null
+++ b/tests/mpf/t-set_q.c

@@ -0,0 +1,126 @@
+/* Test mpf_set_q.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpf_ptr got, mpq_srcptr q)
+{
+  mpf_t  n, d;
+
+  mpf_set_q (got, q);
+
+  PTR(n) = PTR(&q->_mp_num);
+  SIZ(n) = SIZ(&q->_mp_num);
+  EXP(n) = ABSIZ(&q->_mp_num);
+
+  PTR(d) = PTR(&q->_mp_den);
+  SIZ(d) = SIZ(&q->_mp_den);
+  EXP(d) = ABSIZ(&q->_mp_den);
+
+  if (! refmpf_validate_division ("mpf_set_q", got, n, d))
+    {
+      mp_trace_base = -16;
+      mpq_trace ("   q", q);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec;
+  mpf_t  got;
+  mpq_t  q;
+  int    i;
+
+  mpf_init (got);
+  mpq_init (q);
+
+  for (i = 0; i < 400; i++)
+    {
+      /* result precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 20L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* num */
+      prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);
+      mpz_rrandomb (mpq_numref(q), rands, prec);
+
+      /* possibly negative num */
+      if (gmp_urandomb_ui (rands, 1L))
+        mpz_neg (mpq_numref(q), mpq_numref(q));
+
+      /* den, non-zero */
+      do {
+        prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);
+        mpz_rrandomb (mpq_denref(q), rands, prec);
+      } while (mpz_sgn (mpq_denref(q)) <= 0);
+
+      check_one (got, q);
+    }
+
+  mpf_clear (got);
+  mpq_clear (q);
+}
+
+void
+check_various (void)
+{
+  mpf_t got;
+  mpq_t q;
+
+  mpf_init (got);
+  mpq_init (q);
+
+  /* 1/1 == 1 */
+  mpf_set_prec (got, 20L);
+  mpq_set_ui (q, 1L, 1L);
+  mpf_set_q (got, q);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);
+
+  /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+  mpf_set_prec (got, 500L);
+  mpq_set_ui (q, 1L, 1L);
+  mpz_mul_2exp (mpq_denref(q), mpq_denref(q), 800L);
+  mpz_add_ui (mpq_denref(q), mpq_denref(q), 1L);
+  check_one (got, q);
+
+  mpf_clear (got);
+  mpq_clear (q);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-set_si.c b/tests/mpf/t-set_si.c
new file mode 100644
index 0000000..5cd6c89
--- /dev/null
+++ b/tests/mpf/t-set_si.c

@@ -0,0 +1,90 @@
+/* Test mpf_set_si and mpf_init_set_si.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    long       x;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+    { -1L, -1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { LONG_MAX,  1, { LONG_MAX, 0 } },
+    { -LONG_MAX,  -1, { LONG_MAX, 0 } },
+    { LONG_HIGHBIT,  -1, { ULONG_HIGHBIT, 0 } },
+#else
+    { LONG_MAX,  2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS } },
+    { -LONG_MAX,  -2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS }},
+    { LONG_HIGHBIT,  -2, { 0, ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpf_t  x;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_init (x);
+      mpf_set_si (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+
+      mpf_init_set_si (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_init_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-set_ui.c b/tests/mpf/t-set_ui.c
new file mode 100644
index 0000000..828067c
--- /dev/null
+++ b/tests/mpf/t-set_ui.c

@@ -0,0 +1,89 @@
+/* Test mpf_set_ui and mpf_init_set_ui.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    unsigned long  x;
+    mp_size_t      want_size;
+    mp_limb_t      want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { ULONG_MAX,     1, { ULONG_MAX, 0 } },
+    { ULONG_HIGHBIT, 1, { ULONG_HIGHBIT, 0 } },
+#else
+    { ULONG_MAX,     2, { ULONG_MAX & GMP_NUMB_MASK,
+                          ULONG_MAX >> GMP_NUMB_BITS } },
+    { ULONG_HIGHBIT, 2, { 0,
+                          ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpf_t  x;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_init (x);
+      mpf_set_ui (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_set_ui wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+
+      mpf_init_set_ui (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_init_set_ui wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-sqrt.c b/tests/mpf/t-sqrt.c
new file mode 100644
index 0000000..5e93aba
--- /dev/null
+++ b/tests/mpf/t-sqrt.c

@@ -0,0 +1,193 @@
+/* Test mpf_sqrt, mpf_mul.
+
+Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void
+check_rand1 (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t x, y, y2;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (x);
+  mpf_init (y);
+  mpf_init (y2);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (x, size, exp);
+
+      mpf_sqrt (y, x);
+      MPF_CHECK_FORMAT (y);
+      mpf_mul (y2, y, y);
+
+      mpf_reldiff (rerr, x, y2);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+	{
+	  mpf_set (max_rerr, rerr);
+#if VERBOSE
+	  mpf_dump (max_rerr);
+#endif
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR after %d tests\n", i);
+	      printf ("   x = "); mpf_dump (x);
+	      printf ("   y = "); mpf_dump (y);
+	      printf ("  y2 = "); mpf_dump (y2);
+	      printf ("   rerr       = "); mpf_dump (rerr);
+	      printf ("   limit_rerr = "); mpf_dump (limit_rerr);
+              printf ("in hex:\n");
+              mp_trace_base = 16;
+	      mpf_trace ("   x  ", x);
+	      mpf_trace ("   y  ", y);
+	      mpf_trace ("   y2 ", y2);
+	      mpf_trace ("   rerr      ", rerr);
+	      mpf_trace ("   limit_rerr", limit_rerr);
+	      abort ();
+	    }
+	}
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (x);
+  mpf_clear (y);
+  mpf_clear (y2);
+}
+
+void
+check_rand2 (void)
+{
+  unsigned long      max_prec = 20;
+  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long      x_prec, r_prec;
+  mpf_t              x, r, s;
+  int                i;
+
+  mpf_init (x);
+  mpf_init (r);
+  mpf_init (s);
+  refmpf_set_prec_limbs (s, 2*max_prec+10);
+
+  for (i = 0; i < 500; i++)
+    {
+      /* input precision */
+      x_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (x, x_prec);
+
+      /* result precision */
+      r_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (r, r_prec);
+
+      mpf_random2 (x, x_prec, 1000);
+
+      mpf_sqrt (r, x);
+      MPF_CHECK_FORMAT (r);
+
+      /* Expect to prec limbs of result.
+         In the current implementation there's no stripping of low zero
+         limbs in mpf_sqrt, so size should be exactly prec.  */
+      if (SIZ(r) != r_prec)
+        {
+          printf ("mpf_sqrt wrong number of result limbs\n");
+          mpf_trace ("  x", x);
+          mpf_trace ("  r", r);
+          printf    ("  r_prec=%lu\n", r_prec);
+          printf    ("  SIZ(r)  %ld\n", (long) SIZ(r));
+          printf    ("  PREC(r) %ld\n", (long) PREC(r));
+          abort ();
+        }
+
+      /* Must have r^2 <= x, since r has been truncated. */
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp (s, x) <= 0))
+        {
+          printf    ("mpf_sqrt result too big\n");
+          mpf_trace ("  x", x);
+          printf    ("  r_prec=%lu\n", r_prec);
+          mpf_trace ("  r", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+
+      /* Must have (r+ulp)^2 > x, or else r is too small. */
+      refmpf_add_ulp (r);
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp (s, x) > 0))
+        {
+          printf    ("mpf_sqrt result too small\n");
+          mpf_trace ("  x", x);
+          printf    ("  r_prec=%lu\n", r_prec);
+          mpf_trace ("  r+ulp", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+    }
+
+  mpf_clear (x);
+  mpf_clear (r);
+  mpf_clear (s);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_rand1 (argc, argv);
+  check_rand2 ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-sqrt_ui.c b/tests/mpf/t-sqrt_ui.c
new file mode 100644
index 0000000..31c587f
--- /dev/null
+++ b/tests/mpf/t-sqrt_ui.c

@@ -0,0 +1,125 @@
+/* Test mpf_sqrt_ui.
+
+Copyright 2004, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_rand (void)
+{
+  unsigned long      max_prec = 15;
+  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long      x, prec;
+  mpf_t              r, s;
+  int                i;
+
+  mpf_init (r);
+  mpf_init (s);
+  refmpf_set_prec_limbs (s, 2*max_prec+10);
+
+  for (x = 0; x < 2; x++)
+    {
+      mpf_sqrt_ui (r, x);
+      MPF_CHECK_FORMAT (r);
+      if (mpf_cmp_ui (r, x) != 0)
+	{
+	  printf    ("mpf_sqrt_ui wrong for special case:\n");
+          printf    ("  x=%lu\n", x);
+          mpf_trace ("  r", r);
+	  abort ();
+	}
+    }
+
+  for (i = 0; i < 50; i++)
+    {
+      /* input, a random non-zero ulong, exponentially distributed */
+      do {
+        x = gmp_urandomb_ui (rands,
+                             gmp_urandomm_ui (rands, BITS_PER_ULONG) + 1);
+      } while (x <= 1);
+
+      /* result precision */
+      prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (r, prec);
+
+      mpf_sqrt_ui (r, x);
+      MPF_CHECK_FORMAT (r);
+
+      /* Expect to prec limbs of result.
+         In the current implementation there's no stripping of low zero
+         limbs in mpf_sqrt_ui, not even on perfect squares, so size should
+         be exactly prec.  */
+      if (SIZ(r) != prec)
+        {
+          printf ("mpf_sqrt_ui result not enough result limbs\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r", r);
+          printf    ("  r size %ld\n", (long) SIZ(r));
+          printf    ("  r prec %ld\n", (long) PREC(r));
+          abort ();
+        }
+
+      /* Must have r^2 <= x, since r has been truncated. */
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp_ui (s, x) <= 0))
+        {
+          printf    ("mpf_sqrt_ui result too big\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+
+      /* Must have (r+ulp)^2 > x.
+         No overflow from refmpf_add_ulp since r is only prec limbs. */
+      refmpf_add_ulp (r);
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp_ui (s, x) > 0))
+        {
+          printf    ("mpf_sqrt_ui result too small\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r+ulp", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+    }
+
+  mpf_clear (r);
+  mpf_clear (s);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-sub.c b/tests/mpf/t-sub.c
new file mode 100644
index 0000000..3872264
--- /dev/null
+++ b/tests/mpf/t-sub.c

@@ -0,0 +1,287 @@
+/* Test mpf_sub.
+
+Copyright 1996, 2001, 2004, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void
+check_rand (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t u, v, w, wref;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+	bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (wref);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (u, size, exp);
+
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (v, size, exp);
+
+      if ((urandom () & 1) != 0)
+	mpf_add_ui (u, v, 1);
+      else if ((urandom () & 1) != 0)
+	mpf_sub_ui (u, v, 1);
+
+      mpf_sub (w, u, v);
+      refmpf_sub (wref, u, v);
+
+      mpf_reldiff (rerr, w, wref);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+	{
+	  mpf_set (max_rerr, rerr);
+#if VERBOSE
+	  mpf_dump (max_rerr);
+#endif
+	  if (mpf_cmp (rerr, limit_rerr) > 0)
+	    {
+	      printf ("ERROR after %d tests\n", i);
+	      printf ("   u = "); mpf_dump (u);
+	      printf ("   v = "); mpf_dump (v);
+	      printf ("wref = "); mpf_dump (wref);
+	      printf ("   w = "); mpf_dump (w);
+	      abort ();
+	    }
+	}
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (wref);
+}
+
+#define W GMP_NUMB_MAX
+
+void
+check_data (void)
+{
+  static const struct {
+    struct {
+      int        exp, size;
+      mp_limb_t  d[10];
+    } x, y, want;
+
+  } data[] = {
+    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },
+    { { 1, 1, { 9 } },                  { 1, 1, { 8 } },   { 1, 1, { 1 } } },
+    { { 1, 1, { 9 } },                 { 1, -1, { 6 } },   { 1, 1, { 15 } } },
+    { { 1, 2, { 8, 9 } },               { 1, 1, { 8 } },   { 1, 2, { 8, 1 } } },
+    { { 2, 2, { 8, 1 } },               { 1, 1, { 9 } },   { 1, 1, { W } } },
+    { { 2, 2, { 9, 8 } },               { 1, 1, { 9 } },   { 2, 1, { 8 } } },
+    { { 2, 1, { 1 } },                  { 1, 1, { 1 } },   { 1, 1, { W } } },
+    { { 2, 1, { 9 } },                  { 1, 1, { W } },   { 2, 2, { 1, 8 } } },
+
+    { { 1, 2, { W, 8 } },             { 1, 1, { 9 } },   { 0, -1, { 1 } } },
+    { { 1, 2, { W, 7 } },             { 1, 1, { 9 } },   { 1, -2, { 1, 1 } } },
+    { { 1, 2, { 1, 8 } },             { 1, 1, { 9 } },   { 0, -1, { W } } },
+    { { 1, 2, { 1, 7 } },             { 1, 1, { 9 } },   { 1, -2, { W, 1 } } },
+    { { 1, 2, { 0, 8 } },             { 1, 1, { 9 } },   { 1, -1, { 1 } } },
+    { { 2, 3, { 5, 8, 1 } },          { 1, 1, { 9 } },   { 1, 2, { 5, W } } },
+    { { 3, 1, { 1 } },                { 1, 1, { 1 } },   { 2, 2, { W, W } } },
+    { { 1, 6, { W, W, W, W, W, 8 } }, { 1, 1, { 9 } },   { -4, -1, { 1 } } },
+    { { 5, 5, { W-6, W, W, W, W } },  { 6, 1, { 1 } },   { 1, -1, { 7 } } },
+
+    /* f - f == 0, various sizes.
+       These exercise a past problem (gmp 4.1.3 and earlier) where the
+       result exponent was not zeroed on a zero result like this.  */
+    { { 0, 0 }, { 0, 0 }, { 0, 0 } },
+    { { 99, 3, { 0, 0, 1 } },       { 99, 1, { 1 } },             { 0, 0 } },
+    { { 99, 3, { 0, 123, 456 } },   { 99, 2, { 123, 456 } },      { 0, 0 } },
+    { { 99, 3, { 123, 456, 789 } }, { 99, 3, { 123, 456, 789 } }, { 0, 0 } },
+
+    /* High limbs cancel, leaving just the low limbs of the longer operand.
+       This exercises a past problem (gmp 4.1.3 and earlier) where high zero
+       limbs on the remainder were not stripped before truncating to the
+       destination, causing loss of precision.  */
+    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },
+    { { 123, 3, { 8, 0, 9 } },          { 123, 1, { 9 } }, { 121, 1, { 8 } } },
+    { { 123, 4, { 8, 0, 0, 9 } },       { 123, 1, { 9 } }, { 120, 1, { 8 } } },
+    { { 123, 5, { 8, 0, 0, 0, 9 } },    { 123, 1, { 9 } }, { 119, 1, { 8 } } },
+    { { 123, 6, { 8, 0, 0, 0, 0, 9 } }, { 123, 1, { 9 } }, { 118, 1, { 8 } } },
+    /* { { 123, 6, { 8, 0, 0, 0, 0, 9 } }, { 123, 6, { 9, 0, 0, 0, 0, 8 } }, { 122, 5, { W, W, W, W, W } } }, */
+
+  };
+
+  mpf_t  x, y, got, want;
+  int  i, swap, fail;
+
+  fail = 0;
+  mp_trace_base = 16;
+  mpf_init (got);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (swap = 0; swap <= 7; swap++)
+        {
+          PTR(x) = (mp_ptr) data[i].x.d;
+          SIZ(x) = data[i].x.size;
+          EXP(x) = data[i].x.exp;
+          PREC(x) = numberof (data[i].x.d);
+          MPF_CHECK_FORMAT (x);
+
+          PTR(y) = (mp_ptr) data[i].y.d;
+          SIZ(y) = data[i].y.size;
+          EXP(y) = data[i].y.exp;
+          PREC(y) = numberof (data[i].y.d);
+          MPF_CHECK_FORMAT (y);
+
+          PTR(want) = (mp_ptr) data[i].want.d;
+          SIZ(want) = data[i].want.size;
+          EXP(want) = data[i].want.exp;
+          PREC(want) = numberof (data[i].want.d);
+          MPF_CHECK_FORMAT (want);
+
+          if (swap & 4)
+            {
+              mpf_swap (want, y);
+            }
+
+	  if ((SIZ (x) ^ SIZ (y)) < 0)
+	    continue; /* It's an addition, not a subtraction (TO BE REMOVED) */
+
+          if (swap & 1)
+            {
+              mpf_swap (x, y);
+              SIZ(want) = - SIZ(want);
+            }
+
+          if (swap & 2)
+            {
+              SIZ(want) = - SIZ(want);
+              SIZ(x) = - SIZ(x);
+              SIZ(y) = - SIZ(y);
+            }
+
+          mpf_sub (got, x, y);
+/*           MPF_CHECK_FORMAT (got); */
+
+          if (! refmpf_validate ("mpf_sub", got, want))
+            {
+              printf ("check_data() wrong result at data[%d] (operands%s swapped)\n", i, swap ? "" : " not");
+              mpf_trace ("x   ", x);
+              mpf_trace ("y   ", y);
+              mpf_trace ("got ", got);
+              mpf_trace ("want", want);
+	      fail = 1;
+            }
+
+	  if (SIZ (x) == 1 || SIZ (x) == 0 )
+	    {
+	      if (SIZ (y)) EXP (y) -= EXP (x) - (mp_exp_t) SIZ (x);
+	      if (SIZ (want)) EXP (want) -= EXP (x) - (mp_exp_t) SIZ (x);
+	      EXP (x) = (mp_exp_t) SIZ (x);
+
+	      if (mpf_fits_uint_p (x))
+		{
+		  mpf_ui_sub (got, mpf_get_ui (x), y);
+
+		  if (! refmpf_validate ("mpf_ui_sub", got, want))
+		    {
+		      printf ("check_data() wrong result at data[%d] (operands%s swapped)\n", i, swap ? "" : " not");
+		      mpf_trace ("x   ", x);
+		      mpf_trace ("y   ", y);
+		      mpf_trace ("got ", got);
+		      mpf_trace ("want", want);
+		      fail = 1;
+		    }
+		}
+	    }
+
+	  if (SIZ (y) == 1 || SIZ (y) == 0)
+	    {
+	      if (SIZ (x)) EXP (x) -= EXP (y) - (mp_exp_t) SIZ (y);
+	      if (SIZ (want)) EXP (want) -= EXP (y) - (mp_exp_t) SIZ (y);
+	      EXP (y) = (mp_exp_t) SIZ (y);
+
+	      if (mpf_fits_uint_p (x))
+		{
+		  mpf_sub_ui (got, x, mpf_get_ui (y));
+
+		  if (! refmpf_validate ("mpf_sub_ui", got, want))
+		    {
+		      printf ("check_data() wrong result at data[%d] (operands%s swapped)\n", i, swap ? "" : " not");
+		      mpf_trace ("x   ", x);
+		      mpf_trace ("y   ", y);
+		      mpf_trace ("got ", got);
+		      mpf_trace ("want", want);
+		      fail = 1;
+		    }
+		}
+	    }
+
+        }
+    }
+
+  mpf_clear (got);
+  if (fail)
+    abort ();
+}
+
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_data ();
+  check_rand (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-trunc.c b/tests/mpf/t-trunc.c
new file mode 100644
index 0000000..6543a1e
--- /dev/null
+++ b/tests/mpf/t-trunc.c

@@ -0,0 +1,270 @@
+/* Test mpf_trunc, mpf_ceil, mpf_floor.
+
+Copyright 2001, 2002, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_print (mpf_srcptr src, mpf_srcptr got, mpf_srcptr want)
+{
+  mp_trace_base = 16;
+  mpf_trace ("src ", src);
+  mpf_trace ("got ", got);
+  mpf_trace ("want", want);
+
+  printf ("got  size=%d exp=%ld\n", SIZ(got), EXP(got));
+  mpn_trace ("     limbs=", PTR(got), (mp_size_t) ABSIZ(got));
+
+  printf ("want size=%d exp=%ld\n", SIZ(want), EXP(want));
+  mpn_trace ("     limbs=", PTR(want), (mp_size_t) ABSIZ(want));
+}
+
+void
+check_one (mpf_srcptr src, mpf_srcptr trunc, mpf_srcptr ceil, mpf_srcptr floor)
+{
+  mpf_t  got;
+
+  mpf_init2 (got, mpf_get_prec (trunc));
+  ASSERT_ALWAYS (PREC(got) == PREC(trunc));
+  ASSERT_ALWAYS (PREC(got) == PREC(ceil));
+  ASSERT_ALWAYS (PREC(got) == PREC(floor));
+
+#define CHECK_SEP(name, fun, want)              \
+  mpf_set_ui (got, 54321L); /* initial junk */  \
+  fun (got, src);                               \
+  MPF_CHECK_FORMAT (got);                       \
+  if (mpf_cmp (got, want) != 0)                 \
+    {                                           \
+	printf ("%s wrong\n", name);            \
+	check_print (src, got, want);           \
+	abort ();                               \
+    }
+
+  CHECK_SEP ("mpf_trunc", mpf_trunc, trunc);
+  CHECK_SEP ("mpf_ceil",  mpf_ceil,  ceil);
+  CHECK_SEP ("mpf_floor", mpf_floor, floor);
+
+#define CHECK_INPLACE(name, fun, want)  \
+  mpf_set (got, src);                   \
+  fun (got, got);                       \
+  MPF_CHECK_FORMAT (got);               \
+  if (mpf_cmp (got, want) != 0)         \
+    {                                   \
+	printf ("%s wrong\n", name);    \
+	check_print (src, got, want);   \
+	abort ();                       \
+    }
+
+  CHECK_INPLACE ("mpf_trunc", mpf_trunc, trunc);
+
+  /* Can't do these unconditionally in case truncation by mpf_set strips
+     some low non-zero limbs which would have rounded the result.  */
+  if (mpf_size (src) <= PREC(trunc)+1)
+    {
+      CHECK_INPLACE ("mpf_ceil",  mpf_ceil,  ceil);
+      CHECK_INPLACE ("mpf_floor", mpf_floor, floor);
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_all (mpf_ptr src, mpf_ptr trunc, mpf_ptr ceil, mpf_ptr floor)
+{
+  /* some of these values are generated with direct field assignments */
+  MPF_CHECK_FORMAT (src);
+  MPF_CHECK_FORMAT (trunc);
+  MPF_CHECK_FORMAT (ceil);
+  MPF_CHECK_FORMAT (floor);
+
+  check_one (src, trunc, ceil, floor);
+
+  mpf_neg (src,   src);
+  mpf_neg (trunc, trunc);
+  mpf_neg (ceil,  ceil);
+  mpf_neg (floor, floor);
+  check_one (src, trunc, floor, ceil);
+}
+
+void
+check_various (void)
+{
+  mpf_t  src, trunc, ceil, floor;
+  int    n, i;
+
+  mpf_init2 (src, 512L);
+  mpf_init2 (trunc, 256L);
+  mpf_init2 (ceil,  256L);
+  mpf_init2 (floor, 256L);
+
+  /* 0 */
+  mpf_set_ui (src, 0L);
+  mpf_set_ui (trunc, 0L);
+  mpf_set_ui (ceil, 0L);
+  mpf_set_ui (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1 */
+  mpf_set_ui (src, 1L);
+  mpf_set_ui (trunc, 1L);
+  mpf_set_ui (ceil, 1L);
+  mpf_set_ui (floor, 1L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 2^1024 */
+  mpf_set_ui (src, 1L);
+  mpf_mul_2exp (src,   src,   1024L);
+  mpf_set (trunc, src);
+  mpf_set (ceil,  src);
+  mpf_set (floor, src);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1/2^1024, fraction only */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 1024L);
+  mpf_set_si (trunc, 0L);
+  mpf_set_si (ceil, 1L);
+  mpf_set_si (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1/2 */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 1L);
+  mpf_set_si (trunc, 0L);
+  mpf_set_si (ceil, 1L);
+  mpf_set_si (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 123+1/2^64 */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 64L);
+  mpf_add_ui (src,  src, 123L);
+  mpf_set_si (trunc, 123L);
+  mpf_set_si (ceil, 124L);
+  mpf_set_si (floor, 123L);
+  check_all (src, trunc, ceil, floor);
+
+  /* integer of full prec+1 limbs, unchanged */
+  n = PREC(trunc)+1;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+100;
+  mpf_set (trunc, src);
+  mpf_set (ceil, src);
+  mpf_set (floor, src);
+  check_all (src, trunc, ceil, floor);
+
+  /* full prec+1 limbs, 1 trimmed for integer */
+  n = PREC(trunc)+1;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n-1;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+200;
+  EXP(trunc) = n-1;
+  SIZ(trunc) = n-1;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+201;
+  mpf_set (floor, trunc);
+  mpf_add_ui (ceil, trunc, 1L);
+  check_all (src, trunc, ceil, floor);
+
+  /* prec+3 limbs, 2 trimmed for size */
+  n = PREC(trunc)+3;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+300;
+  EXP(trunc) = n;
+  SIZ(trunc) = n-2;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+302;
+  mpf_set (floor, trunc);
+  mpf_set (ceil, trunc);
+  PTR(ceil)[0]++;
+  check_all (src, trunc, ceil, floor);
+
+  /* prec+4 limbs, 2 trimmed for size, 1 trimmed for integer */
+  n = PREC(trunc)+4;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n-1;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+400;
+  EXP(trunc) = n-1;
+  SIZ(trunc) = n-3;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+403;
+  mpf_set (floor, trunc);
+  mpf_set (ceil, trunc);
+  PTR(ceil)[0]++;
+  check_all (src, trunc, ceil, floor);
+
+  /* F.F, carry out of ceil */
+  EXP(src) = 1;
+  SIZ(src) = 2;
+  PTR(src)[0] = GMP_NUMB_MAX;
+  PTR(src)[1] = GMP_NUMB_MAX;
+  EXP(trunc) = 1;
+  SIZ(trunc) = 1;
+  PTR(trunc)[0] = GMP_NUMB_MAX;
+  mpf_set (floor, trunc);
+  EXP(ceil) = 2;
+  SIZ(ceil) = 1;
+  PTR(ceil)[0] = 1;
+  check_all (src, trunc, ceil, floor);
+
+  /* FF.F, carry out of ceil */
+  EXP(src) = 2;
+  SIZ(src) = 3;
+  PTR(src)[0] = GMP_NUMB_MAX;
+  PTR(src)[1] = GMP_NUMB_MAX;
+  PTR(src)[2] = GMP_NUMB_MAX;
+  EXP(trunc) = 2;
+  SIZ(trunc) = 2;
+  PTR(trunc)[0] = GMP_NUMB_MAX;
+  PTR(trunc)[1] = GMP_NUMB_MAX;
+  mpf_set (floor, trunc);
+  EXP(ceil) = 3;
+  SIZ(ceil) = 1;
+  PTR(ceil)[0] = 1;
+  check_all (src, trunc, ceil, floor);
+
+  mpf_clear (src);
+  mpf_clear (trunc);
+  mpf_clear (ceil);
+  mpf_clear (floor);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpf/t-ui_div.c b/tests/mpf/t-ui_div.c
new file mode 100644
index 0000000..4b86215
--- /dev/null
+++ b/tests/mpf/t-ui_div.c

@@ -0,0 +1,151 @@
+/* Test mpf_ui_div.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, unsigned long u, mpf_srcptr v)
+{
+  mpf_t      uf;
+  mp_limb_t  ulimbs[2];
+  mp_size_t  usize;
+
+  ulimbs[0] = u & GMP_NUMB_MASK;
+  usize = (u != 0);
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  u >>= GMP_NUMB_BITS;
+  ulimbs[1] = u;
+  usize += (u != 0);
+#endif
+  PTR(uf) = ulimbs;
+  SIZ(uf) = usize;
+  EXP(uf) = usize;
+
+  if (! refmpf_validate_division ("mpf_ui_div", got, uf, v))
+    {
+      mp_trace_base = -16;
+      printf    ("  u 0x%lX  (%lu)\n", u, u);
+      mpf_trace ("  v", v);
+      printf    ("  %s\n", desc);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec, u;
+  mpf_t  got, v;
+  int    i;
+
+  mpf_init (got);
+  mpf_init (v);
+
+  for (i = 0; i < 200; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u */
+      prec = gmp_urandomm_ui (rands, BITS_PER_ULONG+1);
+      u = gmp_urandomb_ui (rands, prec);
+
+      /* v precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (v, prec);
+
+      /* v, non-zero */
+      do {
+        mpf_random2 (v, PREC(v), (mp_exp_t) 20);
+      } while (SIZ(v) == 0);
+
+      /* v possibly negative */
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (v, v);
+
+      if ((i % 2) == 0)
+        {
+          /* src != dst */
+          mpf_ui_div (got, u, v);
+          check_one ("separate", got, u, v);
+        }
+      else
+        {
+          /* src == dst */
+          prec = refmpf_set_overlap (got, v);
+          mpf_ui_div (got, u, got);
+          check_one ("overlap src==dst", got, u, v);
+
+          mpf_set_prec_raw (got, prec);
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (v);
+}
+
+void
+check_various (void)
+{
+  mpf_t got, v;
+
+  mpf_init (got);
+  mpf_init (v);
+
+  /* 100/4 == 25 */
+  mpf_set_prec (got, 20L);
+  mpf_set_ui (v, 4L);
+  mpf_ui_div (got, 100L, v);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);
+
+  {
+    /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+    unsigned long  u = 1L;
+    mpf_set_prec (got, 500L);
+    mpf_set_prec (v, 900L);
+    mpf_set_ui (v, 1L);
+    mpf_mul_2exp (v, v, 800L);
+    mpf_add_ui (v, v, 1L);
+    mpf_ui_div (got, u, v);
+    check_one ("1/2^n+1, separate", got, u, v);
+  }
+
+  mpf_clear (got);
+  mpf_clear (v);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/logic.c b/tests/mpn/logic.c
new file mode 100644
index 0000000..2b1c9a9
--- /dev/null
+++ b/tests/mpn/logic.c

@@ -0,0 +1,133 @@
+/* Test mpn_and, mpn_ior, mpn_xor, mpn_andn, mpn_iorn, mpn_xnor, mpn_nand, and
+   mpn_nior.
+
+Copyright 2011-2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Fake native prevalence of the tested operations, so that we actually test
+   the compiled functions, i.e., the ones which users will reach.  The inlined
+   variants will be tested through tests/mpz/logic.c.  */
+#define HAVE_NATIVE_mpn_com    1
+#define HAVE_NATIVE_mpn_and_n  1
+#define HAVE_NATIVE_mpn_andn_n 1
+#define HAVE_NATIVE_mpn_nand_n 1
+#define HAVE_NATIVE_mpn_ior_n  1
+#define HAVE_NATIVE_mpn_iorn_n 1
+#define HAVE_NATIVE_mpn_nior_n 1
+#define HAVE_NATIVE_mpn_xor_n  1
+#define HAVE_NATIVE_mpn_xnor_n 1
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mp_srcptr refp, mp_srcptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, const char *funcname)
+{
+  if (mpn_cmp (refp, rp, n))
+    {
+      printf ("ERROR in mpn_%s\n", funcname);
+      printf ("a: "); mpn_dump (ap, n);
+      printf ("b: "); mpn_dump (bp, n);
+      printf ("r:   "); mpn_dump (rp, n);
+      printf ("ref: "); mpn_dump (refp, n);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, b;
+  mp_ptr ap, bp, rp, refp;
+  mp_size_t max_n, n, i;
+  gmp_randstate_ptr rands;
+  long test, reps = 1000;
+  TMP_DECL;
+  TMP_MARK;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  mpz_inits (a, b, NULL);
+
+  rands = RANDS;		/* FIXME: not used */
+
+  max_n = 100;
+
+  rp = TMP_ALLOC_LIMBS (1 + max_n * 8 / GMP_LIMB_BITS);
+  refp = TMP_ALLOC_LIMBS (1 + max_n * 8 / GMP_LIMB_BITS);
+
+  for (test = 0; test < reps; test++)
+    {
+      for (i = 1; i <= max_n; i++)
+	{
+	  mpz_rrandomb (a, rands, i * 8);
+	  mpz_rrandomb (b, rands, i * 8);
+	  mpz_setbit (a, i * 8 - 1);
+	  mpz_setbit (b, i * 8 - 1);
+	  ap = PTR(a);
+	  bp = PTR(b);
+	  n = SIZ(a);
+
+	  refmpn_and_n (refp, ap, bp, n);
+	  mpn_and_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "and_n");
+
+	  refmpn_ior_n (refp, ap, bp, n);
+	  mpn_ior_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "ior_n");
+
+	  refmpn_xor_n (refp, ap, bp, n);
+	  mpn_xor_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "xor_n");
+
+	  refmpn_andn_n (refp, ap, bp, n);
+	  mpn_andn_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "andn_n");
+
+	  refmpn_iorn_n (refp, ap, bp, n);
+	  mpn_iorn_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "iorn_n");
+
+	  refmpn_nand_n (refp, ap, bp, n);
+	  mpn_nand_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "nand_n");
+
+	  refmpn_nior_n (refp, ap, bp, n);
+	  mpn_nior_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "nior_n");
+
+	  refmpn_xnor_n (refp, ap, bp, n);
+	  mpn_xnor_n (rp, ap, bp, n);
+	  check_one (refp, rp, ap, bp, n, "xnor_n");
+
+	  refmpn_com (refp, ap, n);
+	  mpn_com (rp, ap, n);
+	  check_one (refp, rp, ap, bp, n, "com");
+	}
+    }
+
+  TMP_FREE;
+  mpz_clears (a, b, NULL);
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-addaddmul.c b/tests/mpn/t-addaddmul.c
new file mode 100644
index 0000000..8d3b0da
--- /dev/null
+++ b/tests/mpn/t-addaddmul.c

@@ -0,0 +1,98 @@
+/* Test mpn_addaddmul_1msb0.
+
+Copyright 2021 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if !HAVE_NATIVE_mpn_addaddmul_1msb0
+int main(int argc, char **argv) {
+  return 77;  /* Test driver "SKIP" */
+}
+#else
+
+static void
+one_test (int i, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_limb_t u, mp_limb_t v)
+{
+  mp_ptr r = refmpn_malloc_limbs (n + 1);
+  mp_ptr ref = refmpn_malloc_limbs (n + 1);
+
+  u &= ~GMP_NUMB_HIGHBIT;
+  v &= ~GMP_NUMB_HIGHBIT;
+  ref[n] = mpn_mul_1 (ref, a, n, u);
+  ref[n] += mpn_addmul_1 (ref, b, n, v);
+  r[n] = mpn_addaddmul_1msb0 (r, a, b, n, u, v);
+
+  if (mpn_cmp (r, ref, n+1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "Bad result from addaddmul_1msb0\n");
+      gmp_fprintf (stderr, "op1=%Nx\n", a, n);
+      gmp_fprintf (stderr, "op2=%Nx\n", b, n);
+      gmp_fprintf (stderr, "u = %Mx, v = %Mx\n", u, v);
+      gmp_fprintf (stderr, "res=%Nx\n", r, n + 1);
+      gmp_fprintf (stderr, "ref=%Nx\n", ref, n + 1);
+
+      abort();
+    }
+}
+
+int main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  int i;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_inits (bs, op1, op2, NULL);
+
+  for (i = 0; i < 10000; i++)
+    {
+      unsigned long size_range;
+      mp_size_t bit_size;
+      mp_size_t limb_size;
+      mp_limb_t u, v;
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+      mpz_urandomb (bs, rands, size_range);
+
+      bit_size = mpz_get_ui (bs) + 10;
+      mpz_rrandomb (op1, rands, bit_size);
+      mpz_rrandomb (op2, rands, bit_size);
+
+      mpz_rrandomb (bs, rands, GMP_NUMB_BITS - 1);
+      u = mpz_getlimbn (bs, 0);
+
+      mpz_rrandomb (bs, rands, GMP_NUMB_BITS - 1);
+      v = mpz_getlimbn (bs, 0);
+
+      limb_size = mpz_size (op1);
+      one_test (i, mpz_limbs_read (op1), mpz_limbs_read(op2), limb_size, u, v);
+    }
+  mpz_clears (bs, op1, op2, NULL);
+  return 0;
+}
+#endif

diff --git a/tests/mpn/t-aors_1.c b/tests/mpn/t-aors_1.c
new file mode 100644
index 0000000..c894922
--- /dev/null
+++ b/tests/mpn/t-aors_1.c

@@ -0,0 +1,310 @@
+/* Test mpn_add_1 and mpn_sub_1.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define M      GMP_NUMB_MAX
+#define ASIZE  10
+#define MAGIC  0x1234
+
+#define SETUP()                         \
+  do {                                  \
+    refmpn_random (got, data[i].size);  \
+    got[data[i].size] = MAGIC;          \
+  } while (0)
+
+#define SETUP_INPLACE()                                 \
+  do {                                                  \
+    refmpn_copyi (got, data[i].src, data[i].size);      \
+    got[data[i].size] = MAGIC;                          \
+  } while (0)
+
+#define VERIFY(name)                            \
+  do {                                          \
+    verify (name, i, data[i].src, data[i].n,    \
+            got_c, data[i].want_c,              \
+            got, data[i].want, data[i].size);   \
+  } while (0)
+
+typedef mp_limb_t (*mpn_aors_1_t) (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mpn_aors_1_t fudge (mpn_aors_1_t);
+
+
+void
+verify (const char *name, int i,
+        mp_srcptr src, mp_limb_t n,
+        mp_limb_t got_c, mp_limb_t want_c,
+        mp_srcptr got, mp_srcptr want, mp_size_t size)
+{
+  if (got[size] != MAGIC)
+    {
+      printf ("Overwrite at %s i=%d\n", name, i);
+      abort ();
+    }
+
+  if (got_c != want_c || ! refmpn_equal_anynail (got, want, size))
+    {
+      printf ("Wrong at %s i=%d size=%ld\n", name, i, size);
+      mpn_trace ("   src", src,  size);
+      mpn_trace ("     n", &n,   (mp_size_t) 1);
+      mpn_trace ("   got", got,  size);
+      mpn_trace ("  want", want, size);
+      mpn_trace (" got c", &got_c,  (mp_size_t) 1);
+      mpn_trace ("want c", &want_c, (mp_size_t) 1);
+      abort ();
+    }
+}
+
+
+void
+check_add_1 (void)
+{
+  static const struct {
+    mp_size_t        size;
+    mp_limb_t        n;
+    const mp_limb_t  src[ASIZE];
+    mp_limb_t        want_c;
+    const mp_limb_t  want[ASIZE];
+  } data[] = {
+    { 1, 0, { 0 },  0, { 0 } },
+    { 1, 0, { 1 },  0, { 1 } },
+    { 1, 1, { 0 },  0, { 1 } },
+    { 1, 0, { M },  0, { M } },
+    { 1, M, { 0 },  0, { M } },
+    { 1, 1, { 123 }, 0, { 124 } },
+
+    { 1, 1, { M },  1, { 0 } },
+    { 1, M, { 1 },  1, { 0 } },
+    { 1, M, { M },  1, { M-1 } },
+
+    { 2, 0, { 0, 0 },  0, { 0, 0 } },
+    { 2, 0, { 1, 0 },  0, { 1, 0 } },
+    { 2, 1, { 0, 0 },  0, { 1, 0 } },
+    { 2, 0, { M, 0 },  0, { M, 0 } },
+    { 2, M, { 0, 0 },  0, { M, 0 } },
+    { 2, 1, { M, 0 },  0, { 0, 1 } },
+    { 2, M, { 1, 0 },  0, { 0, 1 } },
+    { 2, M, { M, 0 },  0, { M-1, 1 } },
+    { 2, M, { M, 0 },  0, { M-1, 1 } },
+
+    { 2, 1, { M, M },  1, { 0, 0 } },
+    { 2, M, { 1, M },  1, { 0, 0 } },
+    { 2, M, { M, M },  1, { M-1, 0 } },
+    { 2, M, { M, M },  1, { M-1, 0 } },
+
+    { 3, 1, { M, M, M },  1, { 0, 0, 0 } },
+    { 3, M, { 1, M, M },  1, { 0, 0, 0 } },
+    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },
+    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },
+
+    { 4, 1, { M, M, M, M },  1, { 0, 0, 0, 0 } },
+    { 4, M, { 1, M, M, M },  1, { 0, 0, 0, 0 } },
+    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },
+    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },
+
+    { 4, M, { M, 0,   M, M },  0, { M-1, 1, M, M } },
+    { 4, M, { M, M-1, M, M },  0, { M-1, M, M, M } },
+
+    { 4, M, { M, M, 0,   M },  0, { M-1, 0, 1, M } },
+    { 4, M, { M, M, M-1, M },  0, { M-1, 0, M, M } },
+  };
+
+  mp_limb_t  got[ASIZE];
+  mp_limb_t  got_c;
+  /* mpn_sec_add_a_itch(n) <= n */
+  mp_limb_t  scratch[ASIZE];
+  int        i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      SETUP ();
+      got_c = mpn_add_1 (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_add_1 (got, got, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (in-place)");
+
+      SETUP ();
+      scratch [mpn_sec_add_1_itch(data[i].size)] = MAGIC;
+      got_c = mpn_sec_add_1 (got, data[i].src, data[i].size, data[i].n, scratch);
+      got_c ^= scratch [mpn_sec_add_1_itch(data[i].size)] ^ MAGIC;
+      VERIFY ("check_sec_add_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_sec_add_1 (got, got, data[i].size, data[i].n, scratch);
+      VERIFY ("check_sec_add_1 (in-place)");
+
+      if (data[i].n == 1)
+        {
+          SETUP ();
+          got_c = mpn_add_1 (got, data[i].src, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_add_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_add_1 (got, got, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_add_1 (in-place, const 1)");
+
+          SETUP ();
+          got_c = mpn_sec_add_1 (got, data[i].src, data[i].size,
+				 CNST_LIMB(1), scratch);
+          VERIFY ("check_sec_add_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_sec_add_1 (got, got, data[i].size,
+				 CNST_LIMB(1), scratch);
+          VERIFY ("check_sec_add_1 (in-place, const 1)");
+        }
+
+      /* Same again on functions, not inlines. */
+      SETUP ();
+      got_c = (*fudge(mpn_add_1)) (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (function, separate)");
+
+      SETUP_INPLACE ();
+      got_c = (*fudge(mpn_add_1)) (got, got, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (function, in-place)");
+    }
+}
+
+void
+check_sub_1 (void)
+{
+  static const struct {
+    mp_size_t        size;
+    mp_limb_t        n;
+    const mp_limb_t  src[ASIZE];
+    mp_limb_t        want_c;
+    const mp_limb_t  want[ASIZE];
+  } data[] = {
+    { 1, 0, { 0 },  0, { 0 } },
+    { 1, 0, { 1 },  0, { 1 } },
+    { 1, 1, { 1 },  0, { 0 } },
+    { 1, 0, { M },  0, { M } },
+    { 1, 1, { M },  0, { M-1 } },
+    { 1, 1, { 123 }, 0, { 122 } },
+
+    { 1, 1, { 0 },  1, { M } },
+    { 1, M, { 0 },  1, { 1 } },
+
+    { 2, 0, { 0, 0 },  0, { 0, 0 } },
+    { 2, 0, { 1, 0 },  0, { 1, 0 } },
+    { 2, 1, { 1, 0 },  0, { 0, 0 } },
+    { 2, 0, { M, 0 },  0, { M, 0 } },
+    { 2, 1, { M, 0 },  0, { M-1, 0 } },
+    { 2, 1, { 123, 0 }, 0, { 122, 0 } },
+
+    { 2, 1, { 0, 0 },  1, { M, M } },
+    { 2, M, { 0, 0 },  1, { 1, M } },
+
+    { 3, 0, { 0,   0, 0 },  0, { 0,   0, 0 } },
+    { 3, 0, { 123, 0, 0 },  0, { 123, 0, 0 } },
+
+    { 3, 1, { 0, 0, 0 },  1, { M, M, M } },
+    { 3, M, { 0, 0, 0 },  1, { 1, M, M } },
+
+    { 4, 1, { 0, 0, 0, 0 },  1, { M, M, M, M } },
+    { 4, M, { 0, 0, 0, 0 },  1, { 1, M, M, M } },
+
+    { 4, 1, { 0, 0, 1,   42 },  0, { M, M, 0,   42 } },
+    { 4, M, { 0, 0, 123, 24 },  0, { 1, M, 122, 24 } },
+  };
+
+  mp_limb_t  got[ASIZE];
+  mp_limb_t  got_c;
+  /* mpn_sec_sub_1_itch(n) <= n */
+  mp_limb_t  scratch[ASIZE];
+  int        i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      SETUP ();
+      got_c = mpn_sub_1 (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_sub_1 (got, got, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (in-place)");
+
+      SETUP ();
+      scratch [mpn_sec_sub_1_itch(data[i].size)] = MAGIC;
+      got_c = mpn_sec_sub_1 (got, data[i].src, data[i].size, data[i].n, scratch);
+      got_c ^= scratch [mpn_sec_sub_1_itch(data[i].size)] ^ MAGIC;
+      VERIFY ("check_sec_sub_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_sec_sub_1 (got, got, data[i].size, data[i].n, scratch);
+      VERIFY ("check_sec_sub_1 (in-place)");
+
+      if (data[i].n == 1)
+        {
+          SETUP ();
+          got_c = mpn_sub_1 (got, data[i].src, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_sub_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_sub_1 (got, got, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_sub_1 (in-place, const 1)");
+
+          SETUP ();
+          got_c = mpn_sec_sub_1 (got, data[i].src, data[i].size,
+				 CNST_LIMB(1), scratch);
+          VERIFY ("check_sec_sub_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_sec_sub_1 (got, got, data[i].size,
+				 CNST_LIMB(1), scratch);
+          VERIFY ("check_sec_sub_1 (in-place, const 1)");
+        }
+
+      /* Same again on functions, not inlines. */
+      SETUP ();
+      got_c = (*fudge(mpn_sub_1)) (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (function, separate)");
+
+      SETUP_INPLACE ();
+      got_c = (*fudge(mpn_sub_1)) (got, got, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (function, in-place)");
+    }
+}
+
+/* Try to prevent the optimizer inlining. */
+mpn_aors_1_t
+fudge (mpn_aors_1_t f)
+{
+  return f;
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_add_1 ();
+  check_sub_1 ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-asmtype.c b/tests/mpn/t-asmtype.c
new file mode 100644
index 0000000..c6577d1
--- /dev/null
+++ b/tests/mpn/t-asmtype.c

@@ -0,0 +1,63 @@
+/* Test .type directives on assembler functions.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* This apparently trivial test is designed to detect missing .type and
+   .size directives in asm code, per the problem described under
+   GMP_ASM_TYPE in acinclude.m4.
+
+   A failure can be provoked in a shared or shared+static build by making
+   TYPE and SIZE in config.m4 empty, either by editing it or by configuring
+   with
+
+       ./configure gmp_cv_asm_type= gmp_cv_asm_size=
+
+   mpn_add_n is used for the test because normally it's implemented in
+   assembler on a CPU that has any asm code.
+
+   Enhancement: As noted with GMP_ASM_TYPE, if .type is wrong but .size is
+   right then everything works, but uses code copied down to the mainline
+   data area.  Maybe we could detect that if we built a test library with an
+   object that had .size deliberately disabled.  */
+
+int
+main (void)
+{
+  static const mp_limb_t x[3]    = { 1, 2, 3 };
+  static const mp_limb_t y[3]    = { 4, 5, 6 };
+  static const mp_limb_t want[3] = { 5, 7, 9 };
+  mp_limb_t  got[3];
+
+  mpn_add_n (got, x, y, (mp_size_t) 3);
+
+  if (refmpn_cmp (got, want, (mp_size_t) 3) != 0)
+    {
+      printf ("Wrong result from mpn_add_n\n");
+      abort ();
+    }
+
+  exit (0);
+}

diff --git a/tests/mpn/t-bdiv.c b/tests/mpn/t-bdiv.c
new file mode 100644
index 0000000..60f58da
--- /dev/null
+++ b/tests/mpn/t-bdiv.c

@@ -0,0 +1,354 @@
+/* Copyright 2006, 2007, 2009, 2010, 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+
+static void
+dumpy (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  if (n > 20)
+    {
+      for (i = n - 1; i >= n - 4; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (" ");
+	}
+      printf ("... ");
+      for (i = 3; i >= 0; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (i == 0 ? "" : " ");
+	}
+    }
+  else
+    {
+      for (i = n - 1; i >= 0; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (i == 0 ? "" : " ");
+	}
+    }
+  puts ("");
+}
+
+static unsigned long test;
+
+void
+check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh,
+	   mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname)
+{
+  mp_size_t qn;
+  mp_ptr tp;
+  mp_limb_t cy = 4711;		/* silence warnings */
+  TMP_DECL;
+
+  qn = nn - dn;
+
+  if (qn == 0)
+    return;
+
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+
+  if (dn >= qn)
+    mpn_mul (tp, dp, dn, qp, qn);
+  else
+    mpn_mul (tp, qp, qn, dp, dn);
+
+  cy = mpn_add_n (tp, tp, np, nn);
+
+  if (! mpn_zero_p (tp, qn)
+      || (rp != NULL && (cy != rh || mpn_cmp (tp + qn, rp, dn) != 0)))
+    {
+      printf ("\r*******************************************************************************\n");
+      printf ("%s inconsistent in test %lu\n", fname, test);
+      printf ("N=   "); dumpy (np, nn);
+      printf ("D=   "); dumpy (dp, dn);
+      printf ("Q=   "); dumpy (qp, qn);
+      if (rp != NULL)
+	{
+	  printf ("R=   "); dumpy (rp, dn);
+	  printf ("Rb=  %d, Cy=%d\n", (int) cy, (int) rh);
+	}
+      printf ("T=   "); dumpy (tp, nn);
+      printf ("nn = %ld, dn = %ld, qn = %ld", nn, dn, qn);
+      printf ("\n*******************************************************************************\n");
+      abort ();
+    }
+
+  TMP_FREE;
+}
+
+
+/* These are *bit* sizes. */
+#define SIZE_LOG 16
+#define MAX_DN (1L << SIZE_LOG)
+#define MAX_NN (1L << (SIZE_LOG + 1))
+
+#define COUNT 500
+
+mp_limb_t
+random_word (gmp_randstate_ptr rs)
+{
+  mpz_t x;
+  mp_limb_t r;
+  TMP_DECL;
+  TMP_MARK;
+
+  MPZ_TMP_INIT (x, 2);
+  mpz_urandomb (x, rs, 32);
+  r = mpz_get_ui (x);
+  TMP_FREE;
+  return r;
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  unsigned long maxnbits, maxdbits, nbits, dbits;
+  mpz_t n, d, tz;
+  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
+  mp_ptr np, dp, qp, rp;
+  mp_limb_t rh;
+  mp_limb_t t;
+  mp_limb_t dinv;
+  int count = COUNT;
+  mp_ptr scratch;
+  mp_limb_t ran;
+  mp_size_t alloc, itch;
+  mp_limb_t rran0, rran1, qran0, qran1;
+  TMP_DECL;
+
+  TESTS_REPS (count, argv, argc);
+
+  maxdbits = MAX_DN;
+  maxnbits = MAX_NN;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (d);
+  mpz_init (tz);
+
+  maxnn = maxnbits / GMP_NUMB_BITS + 1;
+  maxdn = maxdbits / GMP_NUMB_BITS + 1;
+
+  TMP_MARK;
+
+  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+
+  alloc = 1;
+  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
+
+  for (test = 0; test < count;)
+    {
+      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+      if (maxdbits > nbits)
+	dbits = random_word (rands) % nbits + 1;
+      else
+	dbits = random_word (rands) % maxdbits + 1;
+
+#if RAND_UNIFORM
+#define RANDFUNC mpz_urandomb
+#else
+#define RANDFUNC mpz_rrandomb
+#endif
+
+      do
+	{
+	  RANDFUNC (n, rands, nbits);
+	  do
+	    {
+	      RANDFUNC (d, rands, dbits);
+	    }
+	  while (mpz_sgn (d) == 0);
+
+	  np = PTR (n);
+	  dp = PTR (d);
+	  nn = SIZ (n);
+	  dn = SIZ (d);
+	}
+      while (nn < dn);
+
+      dp[0] |= 1;
+
+      mpz_urandomb (tz, rands, 32);
+      t = mpz_get_ui (tz);
+
+      if (t % 17 == 0)
+	dp[0] = GMP_NUMB_MAX;
+
+      switch ((int) t % 16)
+	{
+	case 0:
+	  clearn = random_word (rands) % nn;
+	  for (i = 0; i <= clearn; i++)
+	    np[i] = 0;
+	  break;
+	case 1:
+	  mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
+	  break;
+	case 2:
+	  mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
+	  break;
+	}
+
+      test++;
+
+      binvert_limb (dinv, dp[0]);
+
+      rran0 = random_word (rands);
+      rran1 = random_word (rands);
+      qran0 = random_word (rands);
+      qran1 = random_word (rands);
+
+      qp[-1] = qran0;
+      qp[nn - dn + 1] = qran1;
+      rp[-1] = rran0;
+
+      ran = random_word (rands);
+
+      if ((double) (nn - dn) * dn < 1e5)
+	{
+	  if (nn > dn)
+	    {
+	      /* Test mpn_sbpi1_bdiv_qr */
+	      MPN_ZERO (qp, nn - dn);
+	      MPN_ZERO (rp, dn);
+	      MPN_COPY (rp, np, nn);
+	      rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
+	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	      ASSERT_ALWAYS (rp[-1] == rran0);
+	      check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
+
+	      /* Test mpn_sbpi1_bdiv_q */
+	      MPN_COPY (rp, np, nn);
+	      MPN_ZERO (qp, nn - dn);
+	      mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
+	      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	      ASSERT_ALWAYS (rp[-1] == rran0);
+	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
+
+	      /* Test mpn_sbpi1_bdiv_r; we use mpn_sbpi1_bdiv_q's quotient. */
+	      MPN_COPY (rp, np, nn);
+	      mpn_sbpi1_bdiv_r (rp, nn, dp, dn, -dinv);
+	      ASSERT_ALWAYS (rp[-1] == rran0);
+	      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_r");
+	    }
+	}
+
+      if (dn >= 4 && nn - dn >= 2)
+	{
+	  /* Test mpn_dcpi1_bdiv_qr */
+	  MPN_COPY (rp, np, nn);
+	  MPN_ZERO (qp, nn - dn);
+	  rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);
+	  check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
+	}
+
+      if (dn >= 4 && nn - dn >= 2)
+	{
+	  /* Test mpn_dcpi1_bdiv_q */
+	  MPN_COPY (rp, np, nn);
+	  MPN_ZERO (qp, nn - dn);
+	  mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);
+	  check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
+	}
+
+      if (nn > dn)
+	{
+	  /* Test mpn_bdiv_qr */
+	  itch = mpn_bdiv_qr_itch (nn, dn);
+	  if (itch + 1 > alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_ZERO (qp, nn - dn);
+	  MPN_ZERO (rp, dn);
+	  rp[dn] = rran1;
+	  rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+
+	  check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
+	}
+
+      if (nn - dn < 2 || dn < 2)
+	continue;
+
+      /* Test mpn_mu_bdiv_qr */
+      itch = mpn_mu_bdiv_qr_itch (nn, dn);
+      if (itch + 1 > alloc)
+	{
+	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	  alloc = itch + 1;
+	}
+      scratch[itch] = ran;
+      MPN_ZERO (qp, nn - dn);
+      MPN_ZERO (rp, dn);
+      rp[dn] = rran1;
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
+      ASSERT_ALWAYS (ran == scratch[itch]);
+      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");
+
+      /* Test mpn_mu_bdiv_q */
+      itch = mpn_mu_bdiv_q_itch (nn, dn);
+      if (itch + 1 > alloc)
+	{
+	  scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	  alloc = itch + 1;
+	}
+      scratch[itch] = ran;
+      MPN_ZERO (qp, nn - dn + 1);
+      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
+      ASSERT_ALWAYS (ran == scratch[itch]);
+      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
+    }
+
+  __GMP_FREE_FUNC_LIMBS (scratch, alloc);
+
+  TMP_FREE;
+
+  mpz_clear (n);
+  mpz_clear (d);
+  mpz_clear (tz);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-broot.c b/tests/mpn/t-broot.c
new file mode 100644
index 0000000..bd8e80f
--- /dev/null
+++ b/tests/mpn/t-broot.c

@@ -0,0 +1,108 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (3*MAX_LIMBS); /* For mpn_powlo */
+
+  for (i = 0; i < count; i++)
+    {
+      mp_size_t n;
+      mp_limb_t k;
+      int c;
+
+      n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+      if (i & 1)
+	mpn_random2 (ap, n);
+      else
+	mpn_random (ap, n);
+
+      ap[0] |= 1;
+
+      if (i < 100)
+	k = 3 + 2*i;
+      else
+	{
+	  mpn_random (&k, 1);
+	  if (k < 3)
+	    k = 3;
+	  else
+	    k |= 1;
+	}
+      mpn_broot (rp, ap, n, k);
+      mpn_powlo (pp, rp, &k, 1, n, scratch);
+
+      MPN_CMP (c, ap, pp, n);
+      if (c != 0)
+	{
+	  gmp_fprintf (stderr,
+		       "mpn_broot returned bad result: %u limbs\n",
+		       (unsigned) n);
+	  gmp_fprintf (stderr, "k   = %Mx\n", k);
+	  gmp_fprintf (stderr, "a   = %Nx\n", ap, n);
+	  gmp_fprintf (stderr, "r   = %Nx\n", rp, n);
+	  gmp_fprintf (stderr, "r^k = %Nx\n", pp, n);
+	  abort ();
+	}
+    }
+
+  mpn_broot (rp, ap, MAX_LIMBS, 1);
+  if (mpn_cmp (ap, rp, MAX_LIMBS) != 0)
+    {
+      gmp_fprintf (stderr,
+		   "mpn_broot returned bad result: %u limbs\n",
+		   (unsigned) MAX_LIMBS);
+      gmp_fprintf (stderr, "k   = %Mx\n", 1);
+      gmp_fprintf (stderr, "a   = %Nx\n", ap, MAX_LIMBS);
+      gmp_fprintf (stderr, "r   = %Nx\n", rp, MAX_LIMBS);
+      abort ();
+    }
+
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-brootinv.c b/tests/mpn/t-brootinv.c
new file mode 100644
index 0000000..f5a9950
--- /dev/null
+++ b/tests/mpn/t-brootinv.c

@@ -0,0 +1,96 @@
+/* Copyright 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, app, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  app = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS);
+
+  for (i = 0; i < count; i++)
+    {
+      mp_size_t n;
+      mp_limb_t k;
+
+      n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+      if (i & 1)
+	mpn_random2 (ap, n);
+      else
+	mpn_random (ap, n);
+
+      ap[0] |= 1;
+
+      if (i < 100)
+	k = 3 + 2*i;
+      else
+	{
+	  mpn_random (&k, 1);
+	  if (k < 3)
+	    k = 3;
+	  else
+	    k |= 1;
+	}
+      mpn_brootinv (rp, ap, n, k, scratch);
+      mpn_powlo (pp, rp, &k, 1, n, scratch);
+      mpn_mullo_n (app, ap, pp, n);
+
+      if (app[0] != 1 || !(n == 1 || mpn_zero_p (app+1, n-1)))
+	{
+	  gmp_fprintf (stderr,
+		       "mpn_brootinv returned bad result: %u limbs\n",
+		       (unsigned) n);
+	  gmp_fprintf (stderr, "k     = %Mx\n", k);
+	  gmp_fprintf (stderr, "a     = %Nx\n", ap, n);
+	  gmp_fprintf (stderr, "r     = %Nx\n", rp, n);
+	  gmp_fprintf (stderr, "r^n   = %Nx\n", pp, n);
+	  gmp_fprintf (stderr, "a r^n = %Nx\n", app, n);
+	  abort ();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-div.c b/tests/mpn/t-div.c
new file mode 100644
index 0000000..110385f
--- /dev/null
+++ b/tests/mpn/t-div.c

@@ -0,0 +1,497 @@
+/* Copyright 2006, 2007, 2009, 2010, 2013-2015, 2018 Free Software
+   Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+
+static void
+dumpy (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  if (n > 20)
+    {
+      for (i = n - 1; i >= n - 4; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (" ");
+	}
+      printf ("... ");
+      for (i = 3; i >= 0; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (i == 0 ? "" : " ");
+	}
+    }
+  else
+    {
+      for (i = n - 1; i >= 0; i--)
+	{
+	  printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+	  printf (i == 0 ? "" : " ");
+	}
+    }
+  puts ("");
+}
+
+static signed long test;
+
+static void
+check_one (mp_ptr qp, mp_srcptr rp,
+	   mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
+	   const char *fname, mp_limb_t q_allowed_err)
+{
+  mp_size_t qn = nn - dn + 1;
+  mp_ptr tp;
+  const char *msg;
+  const char *tvalue;
+  mp_limb_t i;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+  if (dn >= qn)
+    refmpn_mul (tp, dp, dn, qp, qn);
+  else
+    refmpn_mul (tp, qp, qn, dp, dn);
+
+  for (i = 0; i < q_allowed_err && (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0); i++)
+    ASSERT_NOCARRY (refmpn_sub (tp, tp, nn+1, dp, dn));
+
+  if (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0)
+    {
+      msg = "q too large";
+      tvalue = "Q*D";
+    error:
+      printf ("\r*******************************************************************************\n");
+      printf ("%s failed test %ld: %s\n", fname, test, msg);
+      printf ("N=    "); dumpy (np, nn);
+      printf ("D=    "); dumpy (dp, dn);
+      printf ("Q=    "); dumpy (qp, qn);
+      if (rp)
+	{ printf ("R=    "); dumpy (rp, dn); }
+      printf ("%5s=", tvalue); dumpy (tp, nn+1);
+      printf ("nn = %ld, dn = %ld, qn = %ld\n", nn, dn, qn);
+      abort ();
+    }
+
+  ASSERT_NOCARRY (refmpn_sub_n (tp, np, tp, nn));
+  tvalue = "N-Q*D";
+  if (!(nn == dn || mpn_zero_p (tp + dn, nn - dn)) || mpn_cmp (tp, dp, dn) >= 0)
+    {
+      msg = "q too small";
+      goto error;
+    }
+
+  if (rp && mpn_cmp (rp, tp, dn) != 0)
+    {
+      msg = "r incorrect";
+      goto error;
+    }
+
+  TMP_FREE;
+}
+
+
+/* These are *bit* sizes. */
+#ifndef SIZE_LOG
+#define SIZE_LOG 17
+#endif
+#define MAX_DN (1L << SIZE_LOG)
+#define MAX_NN (1L << (SIZE_LOG + 1))
+
+#define COUNT 200
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  unsigned long maxnbits, maxdbits, nbits, dbits;
+  mpz_t n, d, q, r, tz, junk;
+  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
+  mp_ptr np, dup, dnp, qp, rp, junkp;
+  mp_limb_t t;
+  gmp_pi1_t dinv;
+  long count = COUNT;
+  mp_ptr scratch;
+  mp_limb_t ran;
+  mp_size_t alloc, itch;
+  mp_limb_t rran0, rran1, qran0, qran1;
+  TMP_DECL;
+
+  TESTS_REPS (count, argv, argc);
+
+  maxdbits = MAX_DN;
+  maxnbits = MAX_NN;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (d);
+  mpz_init (q);
+  mpz_init (r);
+  mpz_init (tz);
+  mpz_init (junk);
+
+  maxnn = maxnbits / GMP_NUMB_BITS + 1;
+  maxdn = maxdbits / GMP_NUMB_BITS + 1;
+
+  TMP_MARK;
+
+  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  dnp = TMP_ALLOC_LIMBS (maxdn);
+
+  alloc = 1;
+  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
+
+  for (test = -300; test < count; test++)
+    {
+      nbits = urandom () % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+
+      if (test < 0)
+	dbits = (test + 300) % (nbits - 1) + 1;
+      else
+	dbits = urandom () % (nbits - 1) % maxdbits + 1;
+
+#if RAND_UNIFORM
+#define RANDFUNC mpz_urandomb
+#else
+#define RANDFUNC mpz_rrandomb
+#endif
+
+      do
+	RANDFUNC (d, rands, dbits);
+      while (mpz_sgn (d) == 0);
+      dn = SIZ (d);
+      dup = PTR (d);
+      MPN_COPY (dnp, dup, dn);
+      dnp[dn - 1] |= GMP_NUMB_HIGHBIT;
+
+      if (test % 2 == 0)
+	{
+	  RANDFUNC (n, rands, nbits);
+	  nn = SIZ (n);
+	  ASSERT_ALWAYS (nn >= dn);
+	}
+      else
+	{
+	  do
+	    {
+	      RANDFUNC (q, rands, urandom () % (nbits - dbits + 1));
+	      RANDFUNC (r, rands, urandom () % mpz_sizeinbase (d, 2));
+	      mpz_mul (n, q, d);
+	      mpz_add (n, n, r);
+	      nn = SIZ (n);
+	    }
+	  while (nn > maxnn || nn < dn);
+	}
+
+      ASSERT_ALWAYS (nn <= maxnn);
+      ASSERT_ALWAYS (dn <= maxdn);
+
+      mpz_urandomb (junk, rands, nbits);
+      junkp = PTR (junk);
+
+      np = PTR (n);
+
+      mpz_urandomb (tz, rands, 32);
+      t = mpz_get_ui (tz);
+
+      if (t % 17 == 0)
+	{
+	  dnp[dn - 1] = GMP_NUMB_MAX;
+	  dup[dn - 1] = GMP_NUMB_MAX;
+	}
+
+      switch ((int) t % 16)
+	{
+	case 0:
+	  clearn = urandom () % nn;
+	  for (i = clearn; i < nn; i++)
+	    np[i] = 0;
+	  break;
+	case 1:
+	  mpn_sub_1 (np + nn - dn, dnp, dn, urandom ());
+	  break;
+	case 2:
+	  mpn_add_1 (np + nn - dn, dnp, dn, urandom ());
+	  break;
+	}
+
+      if (dn >= 2)
+	invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);
+
+      rran0 = urandom ();
+      rran1 = urandom ();
+      qran0 = urandom ();
+      qran1 = urandom ();
+
+      qp[-1] = qran0;
+      qp[nn - dn + 1] = qran1;
+      rp[-1] = rran0;
+
+      ran = urandom ();
+
+      if ((double) (nn - dn) * dn < 1e5)
+	{
+	  /* Test mpn_sbpi1_div_qr */
+	  if (dn > 2)
+	    {
+	      MPN_COPY (rp, np, nn);
+	      if (nn > dn)
+		MPN_COPY (qp, junkp, nn - dn);
+	      qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
+	      check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
+	    }
+
+	  /* Test mpn_sbpi1_divappr_q */
+	  if (dn > 2)
+	    {
+	      MPN_COPY (rp, np, nn);
+	      if (nn > dn)
+		MPN_COPY (qp, junkp, nn - dn);
+	      qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
+	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
+	    }
+
+	  /* Test mpn_sbpi1_div_q */
+	  if (dn > 2)
+	    {
+	      MPN_COPY (rp, np, nn);
+	      if (nn > dn)
+		MPN_COPY (qp, junkp, nn - dn);
+	      qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
+	      check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
+	    }
+
+	  /* Test mpn_sec_div_qr */
+	  itch = mpn_sec_div_qr_itch (nn, dn);
+	  if (itch + 1 > alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (rp, np, nn);
+	  if (nn >= dn)
+	    MPN_COPY (qp, junkp, nn - dn + 1);
+	  qp[nn - dn] = mpn_sec_div_qr (qp, rp, nn, dup, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  check_one (qp, rp, np, nn, dup, dn, "mpn_sec_div_qr (unnorm)", 0);
+
+	  /* Test mpn_sec_div_r */
+	  itch = mpn_sec_div_r_itch (nn, dn);
+	  if (itch + 1 > alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (rp, np, nn);
+	  mpn_sec_div_r (rp, nn, dup, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  /* Note: Since check_one cannot cope with remainder-only functions, we
+	     pass qp[] from the previous function, mpn_sec_div_qr.  */
+	  check_one (qp, rp, np, nn, dup, dn, "mpn_sec_div_r (unnorm)", 0);
+
+	  /* Normalised case, mpn_sec_div_qr */
+	  itch = mpn_sec_div_qr_itch (nn, dn);
+	  scratch[itch] = ran;
+
+	  MPN_COPY (rp, np, nn);
+	  if (nn >= dn)
+	    MPN_COPY (qp, junkp, nn - dn + 1);
+	  qp[nn - dn] = mpn_sec_div_qr (qp, rp, nn, dnp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  check_one (qp, rp, np, nn, dnp, dn, "mpn_sec_div_qr (norm)", 0);
+
+	  /* Normalised case, mpn_sec_div_r */
+	  itch = mpn_sec_div_r_itch (nn, dn);
+	  scratch[itch] = ran;
+	  MPN_COPY (rp, np, nn);
+	  mpn_sec_div_r (rp, nn, dnp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  /* Note: Since check_one cannot cope with remainder-only functions, we
+	     pass qp[] from the previous function, mpn_sec_div_qr.  */
+	  check_one (qp, rp, np, nn, dnp, dn, "mpn_sec_div_r (norm)", 0);
+	}
+
+      /* Test mpn_dcpi1_div_qr */
+      if (dn >= 6 && nn - dn >= 3)
+	{
+	  MPN_COPY (rp, np, nn);
+	  if (nn > dn)
+	    MPN_COPY (qp, junkp, nn - dn);
+	  qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);
+	  check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
+	}
+
+      /* Test mpn_dcpi1_divappr_q */
+      if (dn >= 6 && nn - dn >= 3)
+	{
+	  MPN_COPY (rp, np, nn);
+	  if (nn > dn)
+	    MPN_COPY (qp, junkp, nn - dn);
+	  qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);
+	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
+	}
+
+      /* Test mpn_dcpi1_div_q */
+      if (dn >= 6 && nn - dn >= 3)
+	{
+	  MPN_COPY (rp, np, nn);
+	  if (nn > dn)
+	    MPN_COPY (qp, junkp, nn - dn);
+	  qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);
+	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
+	}
+
+     /* Test mpn_mu_div_qr */
+      if (nn - dn > 2 && dn >= 2)
+	{
+	  itch = mpn_mu_div_qr_itch (nn, dn, 0);
+	  if (itch + 1 > alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (qp, junkp, nn - dn);
+	  MPN_ZERO (rp, dn);
+	  rp[dn] = rran1;
+	  qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+	  check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
+	}
+
+      /* Test mpn_mu_divappr_q */
+      if (nn - dn > 2 && dn >= 2)
+	{
+	  itch = mpn_mu_divappr_q_itch (nn, dn, 0);
+	  if (itch + 1 > alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (qp, junkp, nn - dn);
+	  qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
+	}
+
+      /* Test mpn_mu_div_q */
+      if (nn - dn > 2 && dn >= 2)
+	{
+	  itch = mpn_mu_div_q_itch (nn, dn, 0);
+	  if (itch + 1> alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (qp, junkp, nn - dn);
+	  qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
+	}
+
+      if (1)
+	{
+	  itch = nn + 1;
+	  if (itch + 1> alloc)
+	    {
+	      scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+	      alloc = itch + 1;
+	    }
+	  scratch[itch] = ran;
+	  MPN_COPY (qp, junkp, nn - dn + 1);
+	  mpn_div_q (qp, np, nn, dup, dn, scratch);
+	  ASSERT_ALWAYS (ran == scratch[itch]);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+	  check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
+	}
+
+      if (dn >= 2 && nn >= 2)
+	{
+	  mp_limb_t qh;
+
+	  /* mpn_divrem_2 */
+	  MPN_COPY (rp, np, nn);
+	  qp[nn - 2] = qp[nn-1] = qran1;
+
+	  qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
+	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
+	  qp[nn - 2] = qh;
+	  check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);
+
+	  /* Missing: divrem_2 with fraction limbs. */
+
+	  /* mpn_div_qr_2 */
+	  qp[nn - 2] = qran1;
+
+	  qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
+	  ASSERT_ALWAYS (qp[nn - 2] == qran1);
+	  ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
+	  qp[nn - 2] = qh;
+	  check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
+	}
+      if (dn >= 1 && nn >= 1)
+	{
+	  /* mpn_div_qr_1 */
+	  mp_limb_t qh;
+	  qp[nn-1] = qran1;
+	  rp[0] = mpn_div_qr_1 (qp, &qh, np, nn, dnp[dn - 1]);
+	  ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1);
+	  qp[nn - 1] = qh;
+	  check_one (qp, rp, np, nn,  dnp + dn - 1, 1, "mpn_div_qr_1", 0);
+	}
+    }
+
+  __GMP_FREE_FUNC_LIMBS (scratch, alloc);
+
+  TMP_FREE;
+
+  mpz_clear (n);
+  mpz_clear (d);
+  mpz_clear (q);
+  mpz_clear (r);
+  mpz_clear (tz);
+  mpz_clear (junk);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-divrem_1.c b/tests/mpn/t-divrem_1.c
new file mode 100644
index 0000000..991b7da
--- /dev/null
+++ b/tests/mpn/t-divrem_1.c

@@ -0,0 +1,123 @@
+/* Test mpn_divrem_1 and mpn_preinv_divrem_1.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    mp_limb_t  n[1];
+    mp_size_t  nsize;
+    mp_limb_t  d;
+    mp_size_t  qxn;
+    mp_limb_t  want_q[5];
+    mp_limb_t  want_r;
+  } data[] = {
+    { { 0 }, 1, 1, 0,
+      { 0 }, 0},
+
+    { { 5 }, 1, 2, 0,
+      { 2 }, 1},
+
+    /* Exercises the q update in the nl == constant 0 case of
+       udiv_qrnnd_preinv3. Test case copied from t-fat.c. */
+    { { 287 }, 1, 7, 1,
+      { 0, 41 }, 0 },
+
+#if GMP_NUMB_BITS == 32
+    { { 0x3C }, 1, 0xF2, 1,
+      { 0x3F789854, 0 }, 0x98 },
+#endif
+
+#if GMP_NUMB_BITS == 64
+    { { 0x3C }, 1, 0xF2, 1,
+      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, 0x0E },
+
+    /* This case exposed some wrong code generated by SGI cc on mips64 irix
+       6.5 with -n32 -O2, in the fractional loop for normalized divisor
+       using udiv_qrnnd_preinv.  A test "x>al" in one of the sub_ddmmss
+       expansions came out wrong, leading to an incorrect quotient.  */
+    { { CNST_LIMB(0x3C00000000000000) }, 1, CNST_LIMB(0xF200000000000000), 1,
+      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, CNST_LIMB(0x0E00000000000000) },
+#endif
+  };
+
+  mp_limb_t  dinv, got_r, got_q[numberof(data[0].want_q)];
+  mp_size_t  qsize;
+  int        i, shift;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      qsize = data[i].nsize + data[i].qxn;
+      ASSERT_ALWAYS (qsize <= numberof (got_q));
+
+      got_r = mpn_divrem_1 (got_q, data[i].qxn, data[i].n, data[i].nsize,
+                            data[i].d);
+      if (got_r != data[i].want_r
+          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
+        {
+          printf        ("mpn_divrem_1 wrong at data[%d]\n", i);
+        bad:
+          mpn_trace     ("  n", data[i].n, data[i].nsize);
+          printf        ("  nsize=%ld\n", (long) data[i].nsize);
+          mp_limb_trace ("  d", data[i].d);
+          printf        ("  qxn=%ld\n", (long) data[i].qxn);
+          mpn_trace     ("  want q", data[i].want_q, qsize);
+          mpn_trace     ("  got  q", got_q, qsize);
+          mp_limb_trace ("  want r", data[i].want_r);
+          mp_limb_trace ("  got  r", got_r);
+          abort ();
+        }
+
+      /* test if available */
+#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
+      shift = refmpn_count_leading_zeros (data[i].d);
+      dinv = refmpn_invert_limb (data[i].d << shift);
+      got_r = mpn_preinv_divrem_1 (got_q, data[i].qxn,
+                                   data[i].n, data[i].nsize,
+                                   data[i].d, dinv, shift);
+      if (got_r != data[i].want_r
+          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
+        {
+          printf        ("mpn_preinv divrem_1 wrong at data[%d]\n", i);
+          printf        ("  shift=%d\n", shift);
+          mp_limb_trace ("  dinv", dinv);
+          goto bad;
+        }
+#endif
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-fat.c b/tests/mpn/t-fat.c
new file mode 100644
index 0000000..4e71017
--- /dev/null
+++ b/tests/mpn/t-fat.c

@@ -0,0 +1,310 @@
+/* Test fat binary setups.
+
+Copyright 2003, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+/* In this program we're aiming to pick up certain subtle problems that
+   might creep into a fat binary.
+
+   1. We want to ensure the application entry point routines like
+      __gmpn_add_n dispatch to the correct field of __gmpn_cpuvec.
+
+      Note that these routines are not exercised as a side effect of other
+      tests (eg. the mpz routines).  Internally the fields of __gmpn_cpuvec
+      are used directly, so we need to write test code explicitly calling
+      the mpn functions, like an application will have.
+
+   2. We want to ensure the initial __gmpn_cpuvec data has the initializer
+      function pointers in the correct fields, and that those initializer
+      functions dispatch to their correct corresponding field once
+      initialization has been done.
+
+      Only one of the initializer routines executes in a normal program,
+      since that routine sets all the pointers to actual mpn functions.  We
+      forcibly reset __gmpn_cpuvec so we can run each.
+
+   In both cases for the above, the data put through the functions is
+   nothing special, just enough to verify that for instance an add_n is
+   really doing an add_n and has not for instance mistakenly gone to sub_n
+   or something.
+
+   The loop around each test will exercise the initializer routine on the
+   first iteration, and the dispatcher routine on the second.
+
+   If the dispatcher and/or initializer routines are generated mechanically
+   via macros (eg. mpn/x86/fat/fat_entry.asm) then there shouldn't be too
+   much risk of them going wrong, provided the structure layout is correctly
+   expressed.  But if they're in C then it's good to guard against typos in
+   what is rather repetitive code.  The initializer data for __gmpn_cpuvec
+   in fat.c is always done by hand and is likewise a bit repetitive.  */
+
+
+/* dummies when not a fat binary */
+#if ! WANT_FAT_BINARY
+struct cpuvec_t {
+  int  dummy;
+};
+struct cpuvec_t __gmpn_cpuvec;
+#define ITERATE_FAT_THRESHOLDS()  do { } while (0)
+#endif
+
+/* saved from program startup */
+struct cpuvec_t  initial_cpuvec;
+
+void
+check_functions (void)
+{
+  mp_limb_t  wp[2], xp[2], yp[2], r;
+  int  i;
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      yp[0] = 456;
+      mpn_add_n (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 579);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      wp[0] = 456;
+      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
+      ASSERT_ALWAYS (wp[0] == 702);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+#if HAVE_NATIVE_mpn_copyd
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      xp[1] = 456;
+      mpn_copyd (xp+1, xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (xp[1] == 123);
+    }
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      xp[1] = 456;
+      mpn_copyi (xp, xp+1, (mp_size_t) 1);
+      ASSERT_ALWAYS (xp[0] == 456);
+    }
+#endif
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 1605;
+      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));
+      ASSERT_ALWAYS (wp[0] == 321);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 1296;
+      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));
+      ASSERT_ALWAYS (wp[0] == 432);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 287;
+      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));
+      ASSERT_ALWAYS (wp[1] == 41);
+      ASSERT_ALWAYS (wp[0] == 0);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 12;
+      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));
+      ASSERT_ALWAYS (r == 3);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x1001;
+      mpn_lshift (wp, xp, (mp_size_t) 1, 1);
+      ASSERT_ALWAYS (wp[0] == 0x2002);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 14;
+      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));
+      ASSERT_ALWAYS (r == 2);
+    }
+
+#if (GMP_NUMB_BITS % 4) == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      int  bits = (GMP_NUMB_BITS / 4) * 3;
+      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;
+      mp_limb_t  want = GMP_NUMB_MAX % mod;
+      xp[0] = GMP_NUMB_MAX;
+      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (r % mod == want);
+    }
+#endif
+
+  /*   DECL_modexact_1c_odd ((*modexact_1c_odd)); */
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 14;
+      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));
+      ASSERT_ALWAYS (wp[0] == 56);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      yp[0] = 7;
+      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 35);
+      ASSERT_ALWAYS (wp[1] == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      yp[0] = 7;
+      mpn_mullo_basecase (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 35);
+    }
+
+#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x101;
+      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,
+                               GMP_LIMB_HIGHBIT,
+                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);
+      ASSERT_ALWAYS (wp[0] == 0x202);
+      ASSERT_ALWAYS (wp[1] == 0);
+      ASSERT_ALWAYS (r == 0);
+    }
+#endif
+
+#if GMP_NAIL_BITS == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = GMP_LIMB_HIGHBIT+123;
+      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,
+                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));
+      ASSERT_ALWAYS (r == 123);
+    }
+#endif
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x8008;
+      mpn_rshift (wp, xp, (mp_size_t) 1, 1);
+      ASSERT_ALWAYS (wp[0] == 0x4004);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 25);
+      ASSERT_ALWAYS (wp[1] == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 999;
+      yp[0] = 666;
+      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 333);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      wp[0] = 456;
+      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
+      ASSERT_ALWAYS (wp[0] == 210);
+      ASSERT_ALWAYS (r == 0);
+    }
+}
+
+/* Expect the first use of each fat threshold to invoke the necessary
+   initialization.  */
+void
+check_thresholds (void)
+{
+#define ITERATE(name,field)                                             \
+  do {                                                                  \
+    __gmpn_cpuvec_initialized = 0;					\
+    memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));   \
+    ASSERT_ALWAYS (name != 0);                                          \
+    ASSERT_ALWAYS (name == __gmpn_cpuvec.field);                        \
+    ASSERT_ALWAYS (__gmpn_cpuvec_initialized);                          \
+  } while (0)
+
+  ITERATE_FAT_THRESHOLDS ();
+}
+
+
+int
+main (void)
+{
+  memcpy (&initial_cpuvec, &__gmpn_cpuvec, sizeof (__gmpn_cpuvec));
+
+  tests_start ();
+
+  check_functions ();
+  check_thresholds ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-fib2m.c b/tests/mpn/t-fib2m.c
new file mode 100644
index 0000000..5ad3942
--- /dev/null
+++ b/tests/mpn/t-fib2m.c

@@ -0,0 +1,344 @@
+/* Test mpn_fib2m.
+
+Copyright 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define MAX_K_BITS 16
+#define MAX_K (1L << MAX_K_BITS)
+#define MIN_K 1
+
+#define MAX_MN 20
+#define MAX_KN 30
+
+#define COUNT 200
+
+static int
+test_fib2_fib2m (int count, gmp_randstate_ptr rands)
+{
+  int test;
+  mp_ptr fk, fks1, fkm, fks1m, mp, qp;
+  mp_size_t mn, fn, size, max_mn;
+  TMP_DECL;
+
+  size = MPN_FIB2_SIZE (MAX_K);
+  max_mn = size / 4 + 10;
+  ASSERT (max_mn < size);
+
+  TMP_MARK;
+  fk	 = TMP_ALLOC_LIMBS (size);
+  fks1	 = TMP_ALLOC_LIMBS (size);
+  qp	 = TMP_ALLOC_LIMBS (size);
+  mp	 = TMP_ALLOC_LIMBS (max_mn);
+  fkm	 = 1 + TMP_ALLOC_LIMBS (max_mn * 2 + 1 + 2);
+  fks1m	 = 1 + TMP_ALLOC_LIMBS (max_mn * 2 + 1 + 2);
+
+  for (test = 1; test <= count; ++test)
+    {
+      mp_limb_t fk_before, fk_after, fk1_before, fk1_after;
+      int signflip;
+      unsigned long k;
+
+      k = MIN_K +
+	gmp_urandomm_ui (rands, test < MAX_K_BITS ?
+			 MAX_K >> test : (MAX_K - MIN_K));
+
+      fn = mpn_fib2_ui (fk, fks1, k);
+      do {
+	mn = gmp_urandomm_ui (rands, MAX_K) % (fn / 4 + 10);
+      } while (mn == 0);
+      ASSERT (mn <= max_mn);
+      mpn_random2 (mp, mn);
+      ASSERT (mp [mn - 1] != 0);
+
+      if (fn >= mn)
+	{
+	  mpn_tdiv_qr (qp, fk, 0, fk, fn, mp, mn);
+	  mpn_tdiv_qr (qp, fks1, 0, fks1, fn, mp, mn);
+	}
+      else
+	{
+	  MPN_ZERO (fk + fn, mn - fn);
+	  MPN_ZERO (fks1 + fn, mn - fn);
+	}
+
+      mpn_random2 (fkm - 1, 2*mn+1+2);
+      fk_before = fkm [-1];
+      fk_after = fkm [2 * mn + 1];
+
+      mpn_random2 (fks1m - 1, 2*mn+1+2);
+      fk1_before = fks1m [-1];
+      fk1_after = fks1m [2 * mn + 1];
+
+      qp [0] = k;
+      signflip = mpn_fib2m (fkm, fks1m, qp, 1, mp, mn);
+      if (fkm [-1] != fk_before || fkm [2 * mn + 1] != fk_after
+	  || fks1m [-1] != fk1_before || fks1m [2 * mn + 1] != fk1_after)
+	{
+	  printf ("REDZONE violation in test %d, k = %lu, mn = %u\n",
+		  test, k, (unsigned) mn);
+	  if (fkm[-1] != fk_before)
+	    {
+	      printf ("before fkm:"); mpn_dump (fkm - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk_before, 1);
+	    }
+	  if (fkm[2 * mn + 1] != fk_after)
+	    {
+	      printf ("after fkm:"); mpn_dump (fkm + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk_after, 1);
+	    }
+	  if (fks1m[-1] != fk1_before)
+	    {
+	      printf ("before fks1m:"); mpn_dump (fks1m - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1_before, 1);
+	    }
+	  if (fks1m[2 * mn + 1] != fk1_after)
+	    {
+	      printf ("after fks1m:"); mpn_dump (fks1m + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1_after, 1);
+	    }
+	  abort();
+	}
+
+      if (mpn_cmp (fkm, fk, mn) != 0)
+	{
+	  if (mpn_sub_n (fk, mp, fk, mn) || mpn_cmp (fkm, fk, mn) != 0)
+	    {
+	      printf ("ERROR(k) in test %d, k = %lu, mn = %u\n",
+		      test, k, (unsigned) mn);
+	      mpn_dump (fk, mn);
+	      mpn_dump (fkm, mn);
+	      mpn_dump (mp, mn);
+	      abort();
+	    }
+	  signflip ^= 1;
+	}
+
+      if (mpn_cmp (fks1m, fks1, mn) != 0)
+	{
+	  if (mpn_sub_n (fks1, mp, fks1, mn) || mpn_cmp (fks1m, fks1, mn) != 0)
+	    {
+	      printf ("ERROR(k-1) in test %d, k = %lu, mn = %u\n",
+		      test, k, (unsigned) mn);
+	      mpn_dump (fks1, mn);
+	      mpn_dump (fks1m, mn);
+	      mpn_dump (mp, mn);
+	      abort();
+	    }
+	  signflip ^= 1;
+	}
+
+      if (signflip != 0 && ! mpn_zero_p (fks1m, mn) && ! mpn_zero_p (fkm, mn))
+	{
+	  if ((mp [0] & 1) == 0) /* Should we test only odd modulus-es? */
+	    {
+	      if (! mpn_lshift (fks1m, fks1m, mn, 1) &&
+		  mpn_cmp (mp, fks1m, mn) == 0)
+		continue;
+	      if (! mpn_lshift (fkm, fkm, mn, 1) &&
+		  mpn_cmp (mp, fkm, mn) == 0)
+		continue;
+	    }
+	  printf ("ERROR(sign) in test %d, k = %lu, mn = %u\n",
+		  test, k, (unsigned) mn);
+	  abort();
+	}
+    }
+  TMP_FREE;
+  return 0;
+}
+
+static int
+test_fib2m_2exp (int count, gmp_randstate_ptr rands)
+{
+  int test;
+  mp_ptr fka, fks1a, fkb, fks1b, mp, kp;
+  TMP_DECL;
+
+  TMP_MARK;
+  kp	 = TMP_ALLOC_LIMBS (MAX_KN);
+  mp	 = TMP_ALLOC_LIMBS (MAX_MN);
+  fka	 = 1 + TMP_ALLOC_LIMBS (MAX_MN * 2 + 1 + 2);
+  fks1a	 = 1 + TMP_ALLOC_LIMBS (MAX_MN * 2 + 1 + 2);
+  fkb	 = 1 + TMP_ALLOC_LIMBS (MAX_MN * 2 + 1 + 2);
+  fks1b	 = 1 + TMP_ALLOC_LIMBS (MAX_MN * 2 + 1 + 2);
+
+  for (test = 1; test <= count; ++test)
+    {
+      mp_limb_t fka_before, fka_after, fk1a_before, fk1a_after;
+      mp_limb_t fkb_before, fkb_after, fk1b_before, fk1b_after;
+      mp_size_t mn, kn;
+      int signflip;
+      mp_bitcnt_t exp2;
+
+      mn = gmp_urandomm_ui (rands, MAX_MN - 1) + 1;
+      mpn_random2 (mp, mn);
+
+      exp2 = MIN_K + 1 + gmp_urandomm_ui (rands, MAX_KN * GMP_NUMB_BITS - MIN_K - 1);
+
+      kn = BITS_TO_LIMBS (exp2);
+      MPN_ZERO (kp, kn - 1);
+      kp [kn - 1] = CNST_LIMB (1) << ((exp2 - 1) % GMP_NUMB_BITS);
+
+      mpn_random2 (fka - 1, 2*mn+1+2);
+      fka_before = fka [-1];
+      fka_after = fka [2 * mn + 1];
+
+      mpn_random2 (fks1a - 1, 2*mn+1+2);
+      fk1a_before = fks1a [-1];
+      fk1a_after = fks1a [2 * mn + 1];
+
+      signflip = mpn_fib2m (fka, fks1a, kp, kn, mp, mn);
+      if (fka [-1] != fka_before || fka [2 * mn + 1] != fka_after
+	  || fks1a [-1] != fk1a_before || fks1a [2 * mn + 1] != fk1a_after)
+	{
+	  printf ("REDZONE(a) violation in test %d, exp2 = %lu\n", test, exp2);
+	  if (fka[-1] != fka_before)
+	    {
+	      printf ("before fka:"); mpn_dump (fka - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fka_before, 1);
+	    }
+	  if (fka[2 * mn + 1] != fka_after)
+	    {
+	      printf ("after fka:"); mpn_dump (fka + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fka_after, 1);
+	    }
+	  if (fks1a[-1] != fk1a_before)
+	    {
+	      printf ("before fks1a:"); mpn_dump (fks1a - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1a_before, 1);
+	    }
+	  if (fks1a[2 * mn + 1] != fk1a_after)
+	    {
+	      printf ("after fks1a:"); mpn_dump (fks1a + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1a_after, 1);
+	    }
+	  abort();
+	}
+
+      if (signflip && ! mpn_zero_p (fks1a, mn))
+	mpn_sub_n (fks1a, mp, fks1a, mn);
+      if (mpn_sub_n (fka, fka, fks1a, mn))
+	ASSERT_CARRY (mpn_add_n (fka, fka, mp, mn));
+
+      mpn_sub_1 (kp, kp, kn, 1);
+      ASSERT (exp2 % GMP_NUMB_BITS == 1 || kp [kn - 1] != 0);
+      kn -= kp [kn - 1] == 0;
+
+      mpn_random2 (fkb - 1, 2*mn+1+2);
+      fkb_before = fkb [-1];
+      fkb_after = fkb [2 * mn + 1];
+
+      mpn_random2 (fks1b - 1, 2*mn+1+2);
+      fk1b_before = fks1b [-1];
+      fk1b_after = fks1b [2 * mn + 1];
+
+      signflip = mpn_fib2m (fkb, fks1b, kp, kn, mp, mn);
+      if (fkb [-1] != fkb_before || fkb [2 * mn + 1] != fkb_after
+	  || fks1b [-1] != fk1b_before || fks1b [2 * mn + 1] != fk1b_after)
+	{
+	  printf ("REDZONE(b) violation in test %d, exp2 = %lu\n", test, exp2);
+	  if (fkb[-1] != fkb_before)
+	    {
+	      printf ("before fkb:"); mpn_dump (fkb - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fkb_before, 1);
+	    }
+	  if (fkb[2 * mn + 1] != fkb_after)
+	    {
+	      printf ("after fkb:"); mpn_dump (fkb + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fkb_after, 1);
+	    }
+	  if (fks1b[-1] != fk1b_before)
+	    {
+	      printf ("before fks1b:"); mpn_dump (fks1b - 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1b_before, 1);
+	    }
+	  if (fks1b[2 * mn + 1] != fk1b_after)
+	    {
+	      printf ("after fks1b:"); mpn_dump (fks1b + 2 * mn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&fk1b_after, 1);
+	    }
+	  abort();
+	}
+
+      if (mpn_cmp (fks1a, fkb, mn) != 0)
+	{
+	  if (mpn_sub_n (fkb, mp, fkb, mn) || mpn_cmp (fks1a, fkb, mn) != 0)
+	    {
+	      printf ("ERROR(k) in test %d, exp2 = %lu\n", test, exp2);
+	      mpn_dump (fks1a, mn);
+	      mpn_dump (fkb, mn);
+	      mpn_dump (mp, mn);
+	      abort();
+	    }
+	  signflip ^= 1;
+	}
+
+      if (mpn_cmp (fka, fks1b, mn) != 0)
+	{
+	  if (mpn_sub_n (fks1b, mp, fks1b, mn) || mpn_cmp (fka, fks1b, mn) != 0)
+	    {
+	      printf ("ERROR(k-1) in test %d, exp2 = %lu\n", test, exp2);
+	      mpn_dump (fka, mn);
+	      mpn_dump (fks1b, mn);
+	      mpn_dump (mp, mn);
+	      abort();
+	    }
+	  signflip ^= 1;
+	}
+
+      if (signflip != 0 && ! mpn_zero_p (fks1b, mn) && ! mpn_zero_p (fkb, mn))
+	{
+	  if ((mp [0] & 1) == 0) /* Should we test only odd modulus-es? */
+	    {
+	      if (! mpn_lshift (fks1b, fks1b, mn, 1) &&
+		  mpn_cmp (mp, fks1b, mn) == 0)
+		continue;
+	      if (! mpn_lshift (fkb, fkb, mn, 1) &&
+		  mpn_cmp (mp, fkb, mn) == 0)
+		continue;
+	    }
+	  printf ("ERROR(sign) in test %d, exp2 = %lu\n",
+		  test, exp2);
+	  abort();
+	}
+    }
+  TMP_FREE;
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int count = COUNT;
+  gmp_randstate_ptr rands;
+
+  tests_start ();
+  TESTS_REPS (count, argv, argc);
+  rands = RANDS;
+
+  test_fib2_fib2m (count / 2, rands);
+  test_fib2m_2exp (count / 2, rands);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-gcd_11.c b/tests/mpn/t-gcd_11.c
new file mode 100644
index 0000000..2b4de2f
--- /dev/null
+++ b/tests/mpn/t-gcd_11.c

@@ -0,0 +1,86 @@
+/* Test mpn_gcd_11.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef COUNT
+#define COUNT 500000
+#endif
+
+static void
+one_test (mp_limb_t a, mp_limb_t b, mp_limb_t ref)
+{
+  mp_limb_t r = mpn_gcd_11 (a, b);
+  if (r != ref)
+    {
+      gmp_fprintf (stderr,
+		   "gcd_11 (0x%Mx, 0x%Mx) failed, got: 0x%Mx, ref: 0x%Mx\n",
+		   a, b, r, ref);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, b;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (a);
+  mpz_init (b);
+  for (test = 0; test < count; test++)
+    {
+      mp_limb_t al, bl;
+      mp_bitcnt_t asize = 1 + gmp_urandomm_ui(rands, GMP_NUMB_BITS);
+      mp_bitcnt_t bsize = 1 + gmp_urandomm_ui(rands, GMP_NUMB_BITS);
+      if (test & 1)
+	{
+	  mpz_urandomb (a, rands, asize);
+	  mpz_urandomb (b, rands, bsize);
+	}
+      else
+	{
+	  mpz_rrandomb (a, rands, asize);
+	  mpz_rrandomb (b, rands, bsize);
+	}
+
+      mpz_setbit (a, 0);
+      mpz_setbit (b, 0);
+      al = mpz_getlimbn (a, 0);
+      bl = mpz_getlimbn (b, 0);
+      one_test (al, bl, refmpn_gcd_11 (al, bl));
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-gcd_22.c b/tests/mpn/t-gcd_22.c
new file mode 100644
index 0000000..314bf18
--- /dev/null
+++ b/tests/mpn/t-gcd_22.c

@@ -0,0 +1,87 @@
+/* Test mpn_gcd_22.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef COUNT
+#define COUNT 150000
+#endif
+
+static void
+one_test (mpz_srcptr a, mpz_srcptr b, mpz_srcptr ref)
+{
+  mp_double_limb_t r = mpn_gcd_22 (mpz_getlimbn (a, 1), mpz_getlimbn (a, 0),
+				   mpz_getlimbn (b, 1), mpz_getlimbn (b, 0));
+  if (r.d0 != mpz_getlimbn (ref, 0) || r.d1 != mpz_getlimbn (ref, 1))
+    {
+      gmp_fprintf (stderr,
+		   "gcd_22 (0x%Zx, 0x%Zx) failed, got: g1 = 0x%Mx g0 = %Mx, ref: 0x%Zx\n",
+                   a, b, r.d1, r.d0, ref);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, b, ref;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (ref);
+  for (test = 0; test < count; test++)
+    {
+      mp_bitcnt_t asize = 1 + gmp_urandomm_ui(rands, 2*GMP_NUMB_BITS);
+      mp_bitcnt_t bsize = 1 + gmp_urandomm_ui(rands, 2*GMP_NUMB_BITS);
+      if (test & 1)
+	{
+	  mpz_urandomb (a, rands, asize);
+	  mpz_urandomb (b, rands, bsize);
+	}
+      else
+	{
+	  mpz_rrandomb (a, rands, asize);
+	  mpz_rrandomb (b, rands, bsize);
+	}
+
+      mpz_setbit (a, 0);
+      mpz_setbit (b, 0);
+      refmpz_gcd (ref, a, b);
+      one_test (a, b, ref);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (ref);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-gcdext_1.c b/tests/mpn/t-gcdext_1.c
new file mode 100644
index 0000000..1e73a11
--- /dev/null
+++ b/tests/mpn/t-gcdext_1.c

@@ -0,0 +1,134 @@
+/* Test mpn_gcdext_1.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef COUNT
+#define COUNT 250000
+#endif
+
+static void
+set_signed_limb (mpz_t r, mp_limb_signed_t x)
+{
+  mpz_t t;
+  mp_limb_t abs_x = ABS_CAST(mp_limb_t, x);
+  mpz_set (r, mpz_roinit_n (t, &abs_x, 1));
+  if (x < 0)
+    mpz_neg (r, r);
+}
+
+static void
+one_test (mp_limb_t a, mp_limb_t b)
+{
+  mp_limb_signed_t s, t;
+  mp_limb_t g;
+
+  g = mpn_gcdext_1 (&s, &t, a, b);
+
+  if (g > 0)
+    {
+      mpz_t d, sz, tz, tmp;
+
+      mpz_init (d);
+      mpz_init (sz);
+      mpz_init (tz);
+
+      set_signed_limb (sz, s);
+      set_signed_limb (tz, t);
+
+      mpz_mul (d, mpz_roinit_n (tmp, &a, 1), sz);
+      mpz_addmul (d, mpz_roinit_n (tmp, &b, 1), tz);
+
+      if (mpz_cmp (d, mpz_roinit_n (tmp, &g, 1)) == 0
+	  && a % g == 0 && b % g == 0)
+	{
+	  mp_limb_t a_div_g = a / g;
+	  mp_limb_t b_div_g = b / g;
+	  mp_limb_t abs_s = ABS_CAST(mp_limb_t, s);
+	  mp_limb_t abs_t = ABS_CAST(mp_limb_t, t);
+	  mpz_mul_ui (sz, sz, 2);
+	  mpz_mul_ui (tz, tz, 2);
+	  if ((abs_s == 1 || mpz_cmpabs (sz, mpz_roinit_n (tmp, &b_div_g, 1)) < 0)
+	       && (abs_t == 1 || mpz_cmpabs (tz, mpz_roinit_n (tmp, &a_div_g, 1)) < 0))
+	    {
+	      mpz_clear (d);
+	      mpz_clear (sz);
+	      mpz_clear (tz);
+
+	      return;
+	    }
+	}
+    }
+  gmp_fprintf (stderr,
+	       "gcdext_1 (0x%Mx, 0x%Mx) failed, got: g = 0x%Mx, s = %s0x%Mx, t = %s0x%Mx\n",
+	       a, b, g,
+	       s < 0 ? "-" : "", ABS_CAST(mp_limb_t, s),
+	       t < 0 ? "-" : "", ABS_CAST(mp_limb_t, t));
+  abort();
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, b;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (a);
+  mpz_init (b);
+  for (test = 0; test < count; test++)
+    {
+      mp_limb_t al, bl;
+      mp_bitcnt_t asize = 1 + gmp_urandomm_ui(rands, GMP_NUMB_BITS);
+      mp_bitcnt_t bsize = 1 + gmp_urandomm_ui(rands, GMP_NUMB_BITS);
+      if (test & 1)
+	{
+	  mpz_urandomb (a, rands, asize);
+	  mpz_urandomb (b, rands, bsize);
+	}
+      else
+	{
+	  mpz_rrandomb (a, rands, asize);
+	  mpz_rrandomb (b, rands, bsize);
+	}
+
+      al = mpz_getlimbn (a, 0);
+      bl = mpz_getlimbn (b, 0);
+      al += (al == 0);
+      bl += (bl == 0);
+
+      one_test (al, bl);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-get_d.c b/tests/mpn/t-get_d.c
new file mode 100644
index 0000000..ab0e2ec
--- /dev/null
+++ b/tests/mpn/t-get_d.c

@@ -0,0 +1,497 @@
+/* Test mpn_get_d.
+
+Copyright 2002-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+
+/* Exercise various 2^n values, with various exponents and positive and
+   negative.  */
+void
+check_onebit (void)
+{
+  static const int bit_table[] = {
+    0, 1, 2, 3,
+    GMP_NUMB_BITS - 2, GMP_NUMB_BITS - 1,
+    GMP_NUMB_BITS,
+    GMP_NUMB_BITS + 1, GMP_NUMB_BITS + 2,
+    2 * GMP_NUMB_BITS - 2, 2 * GMP_NUMB_BITS - 1,
+    2 * GMP_NUMB_BITS,
+    2 * GMP_NUMB_BITS + 1, 2 * GMP_NUMB_BITS + 2,
+    3 * GMP_NUMB_BITS - 2, 3 * GMP_NUMB_BITS - 1,
+    3 * GMP_NUMB_BITS,
+    3 * GMP_NUMB_BITS + 1, 3 * GMP_NUMB_BITS + 2,
+    4 * GMP_NUMB_BITS - 2, 4 * GMP_NUMB_BITS - 1,
+    4 * GMP_NUMB_BITS,
+    4 * GMP_NUMB_BITS + 1, 4 * GMP_NUMB_BITS + 2,
+    5 * GMP_NUMB_BITS - 2, 5 * GMP_NUMB_BITS - 1,
+    5 * GMP_NUMB_BITS,
+    5 * GMP_NUMB_BITS + 1, 5 * GMP_NUMB_BITS + 2,
+    6 * GMP_NUMB_BITS - 2, 6 * GMP_NUMB_BITS - 1,
+    6 * GMP_NUMB_BITS,
+    6 * GMP_NUMB_BITS + 1, 6 * GMP_NUMB_BITS + 2,
+  };
+  static const int exp_table[] = {
+    0, -100, -10, -1, 1, 10, 100,
+  };
+
+  /* FIXME: It'd be better to base this on the float format. */
+#if defined (__vax) || defined (__vax__)
+  int     limit = 127;  /* vax fp numbers have limited range */
+#else
+  int     limit = 511;
+#endif
+
+  int        bit_i, exp_i, i;
+  double     got, want;
+  mp_size_t  nsize, sign;
+  long       bit, exp, want_bit;
+  mp_limb_t  np[20];
+
+  for (bit_i = 0; bit_i < numberof (bit_table); bit_i++)
+    {
+      bit = bit_table[bit_i];
+
+      nsize = BITS_TO_LIMBS (bit+1);
+      refmpn_zero (np, nsize);
+      np[bit/GMP_NUMB_BITS] = CNST_LIMB(1) << (bit % GMP_NUMB_BITS);
+
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          want_bit = bit + exp;
+          if (want_bit >= limit || want_bit <= -limit)
+            continue;
+
+          want = 1.0;
+          for (i = 0; i < want_bit; i++)
+            want *= 2.0;
+          for (i = 0; i > want_bit; i--)
+            want *= 0.5;
+
+          for (sign = 0; sign >= -1; sign--, want = -want)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              if (got != want)
+                {
+                  printf    ("mpn_get_d wrong on 2^n\n");
+                  printf    ("   bit      %ld\n", bit);
+                  printf    ("   exp      %ld\n", exp);
+                  printf    ("   want_bit %ld\n", want_bit);
+                  printf    ("   sign     %ld\n", (long) sign);
+                  mpn_trace ("   n        ", np, nsize);
+                  printf    ("   nsize    %ld\n", (long) nsize);
+                  d_trace   ("   want     ", want);
+                  d_trace   ("   got      ", got);
+                  abort();
+                }
+            }
+        }
+    }
+}
+
+
+/* Exercise values 2^n+1, while such a value fits the mantissa of a double. */
+void
+check_twobit (void)
+{
+  int        i, mant_bits;
+  double     got, want;
+  mp_size_t  nsize, sign;
+  mp_ptr     np;
+
+  mant_bits = tests_dbl_mant_bits ();
+  if (mant_bits == 0)
+    return;
+
+  np = refmpn_malloc_limbs (BITS_TO_LIMBS (mant_bits));
+  want = 3.0;
+  for (i = 1; i < mant_bits; i++)
+    {
+      nsize = BITS_TO_LIMBS (i+1);
+      refmpn_zero (np, nsize);
+      np[i/GMP_NUMB_BITS] = CNST_LIMB(1) << (i % GMP_NUMB_BITS);
+      np[0] |= 1;
+
+      for (sign = 0; sign >= -1; sign--)
+        {
+          got = mpn_get_d (np, nsize, sign, 0);
+          if (got != want)
+            {
+              printf    ("mpn_get_d wrong on 2^%d + 1\n", i);
+              printf    ("   sign     %ld\n", (long) sign);
+              mpn_trace ("   n        ", np, nsize);
+              printf    ("   nsize    %ld\n", (long) nsize);
+              d_trace   ("   want     ", want);
+              d_trace   ("   got      ", got);
+              abort();
+            }
+          want = -want;
+        }
+
+      want = 2.0 * want - 1.0;
+    }
+
+  free (np);
+}
+
+
+/* Expect large negative exponents to underflow to 0.0.
+   Some systems might have hardware traps for such an underflow (though
+   usually it's not the default), so watch out for SIGFPE. */
+void
+check_underflow (void)
+{
+  static const long exp_table[] = {
+    -999999L, LONG_MIN,
+  };
+  static const mp_limb_t  np[1] = { 1 };
+
+  static long exp;
+  mp_size_t  nsize, sign;
+  double     got;
+  int        exp_i;
+
+  nsize = numberof (np);
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              if (got != 0.0)
+                {
+                  printf  ("mpn_get_d wrong, didn't get 0.0 on underflow\n");
+                  printf  ("  nsize    %ld\n", (long) nsize);
+                  printf  ("  exp      %ld\n", exp);
+                  printf  ("  sign     %ld\n", (long) sign);
+                  d_trace ("  got      ", got);
+                  abort ();
+                }
+            }
+        }
+    }
+  else
+    {
+      printf ("Warning, underflow to zero tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* Expect large values to result in +/-inf, on IEEE systems. */
+void
+check_inf (void)
+{
+  static const long exp_table[] = {
+    999999L, LONG_MAX,
+  };
+  static const mp_limb_t  np[4] = { 1, 1, 1, 1 };
+  long       exp;
+  mp_size_t  nsize, sign, got_sign;
+  double     got;
+  int        exp_i;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  for (nsize = 1; nsize <= numberof (np); nsize++)
+    {
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              got_sign = (got >= 0 ? 0 : -1);
+              if (! tests_isinf (got))
+                {
+                  printf  ("mpn_get_d wrong, didn't get infinity\n");
+                bad:
+                  printf  ("  nsize    %ld\n", (long) nsize);
+                  printf  ("  exp      %ld\n", exp);
+                  printf  ("  sign     %ld\n", (long) sign);
+                  d_trace ("  got      ", got);
+                  printf  ("  got sign %ld\n", (long) got_sign);
+                  abort ();
+                }
+              if (got_sign != sign)
+                {
+                  printf  ("mpn_get_d wrong sign on infinity\n");
+                  goto bad;
+                }
+            }
+        }
+    }
+}
+
+/* Check values 2^n approaching and into IEEE denorm range.
+   Some systems might not support denorms, or might have traps setup, so
+   watch out for SIGFPE.  */
+void
+check_ieee_denorm (void)
+{
+  static long exp;
+  mp_limb_t  n = 1;
+  long       i;
+  mp_size_t  sign;
+  double     want, got;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      exp = -1020;
+      want = 1.0;
+      for (i = 0; i > exp; i--)
+        want *= 0.5;
+
+      for ( ; exp > -1500 && want != 0.0; exp--)
+        {
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);
+              if (got != want)
+                {
+                  printf  ("mpn_get_d wrong on denorm\n");
+                  printf  ("  n=1\n");
+                  printf  ("  exp   %ld\n", exp);
+                  printf  ("  sign  %ld\n", (long) sign);
+                  d_trace ("  got   ", got);
+                  d_trace ("  want  ", want);
+                  abort ();
+                }
+              want = -want;
+            }
+          want *= 0.5;
+          FORCE_DOUBLE (want);
+        }
+    }
+  else
+    {
+      printf ("Warning, IEEE denorm tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* Check values 2^n approaching exponent overflow.
+   Some systems might trap on overflow, so watch out for SIGFPE.  */
+void
+check_ieee_overflow (void)
+{
+  static long exp;
+  mp_limb_t  n = 1;
+  long       i;
+  mp_size_t  sign;
+  double     want, got;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      exp = 1010;
+      want = 1.0;
+      for (i = 0; i < exp; i++)
+        want *= 2.0;
+
+      for ( ; exp < 1050; exp++)
+        {
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);
+              if (got != want)
+                {
+                  printf  ("mpn_get_d wrong on overflow\n");
+                  printf  ("  n=1\n");
+                  printf  ("  exp   %ld\n", exp);
+                  printf  ("  sign  %ld\n", (long) sign);
+                  d_trace ("  got   ", got);
+                  d_trace ("  want  ", want);
+                  abort ();
+                }
+              want = -want;
+            }
+          want *= 2.0;
+          FORCE_DOUBLE (want);
+        }
+    }
+  else
+    {
+      printf ("Warning, IEEE overflow tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* ARM gcc 2.95.4 was seen generating bad code for ulong->double
+   conversions, resulting in for instance 0x81c25113 incorrectly converted.
+   This test exercises that value, to see mpn_get_d has avoided the
+   problem.  */
+void
+check_0x81c25113 (void)
+{
+#if GMP_NUMB_BITS >= 32
+  double     want = 2176995603.0;
+  double     got;
+  mp_limb_t  np[4];
+  mp_size_t  nsize;
+  long       exp;
+
+  if (tests_dbl_mant_bits() < 32)
+    return;
+
+  for (nsize = 1; nsize <= numberof (np); nsize++)
+    {
+      refmpn_zero (np, nsize-1);
+      np[nsize-1] = CNST_LIMB(0x81c25113);
+      exp = - (nsize-1) * GMP_NUMB_BITS;
+      got = mpn_get_d (np, nsize, (mp_size_t) 0, exp);
+      if (got != want)
+        {
+          printf  ("mpn_get_d wrong on 2176995603 (0x81c25113)\n");
+          printf  ("  nsize  %ld\n", (long) nsize);
+          printf  ("  exp    %ld\n", exp);
+          d_trace ("  got    ", got);
+          d_trace ("  want   ", want);
+          abort ();
+        }
+    }
+#endif
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  int            rep, i;
+  unsigned long  mant_bits;
+  long           exp, exp_min, exp_max;
+  double         got, want, d;
+  mp_size_t      nalloc, nsize, sign;
+  mp_limb_t      nhigh_mask;
+  mp_ptr         np;
+
+  mant_bits = tests_dbl_mant_bits ();
+  if (mant_bits == 0)
+    return;
+
+  /* Allow for vax D format with exponent 127 to -128 only.
+     FIXME: Do something to probe for a valid exponent range.  */
+  exp_min = -100 - mant_bits;
+  exp_max =  100 - mant_bits;
+
+  /* space for mant_bits */
+  nalloc = BITS_TO_LIMBS (mant_bits);
+  np = refmpn_malloc_limbs (nalloc);
+  nhigh_mask = MP_LIMB_T_MAX
+    >> (GMP_NAIL_BITS + nalloc * GMP_NUMB_BITS - mant_bits);
+
+  for (rep = 0; rep < 200; rep++)
+    {
+      /* random exp_min to exp_max, inclusive */
+      exp = exp_min + (long) gmp_urandomm_ui (rands, exp_max - exp_min + 1);
+
+      /* mant_bits worth of random at np */
+      if (rep & 1)
+        mpn_random (np, nalloc);
+      else
+        mpn_random2 (np, nalloc);
+      nsize = nalloc;
+      np[nsize-1] &= nhigh_mask;
+      MPN_NORMALIZE (np, nsize);
+      if (nsize == 0)
+        continue;
+
+      sign = (mp_size_t) gmp_urandomb_ui (rands, 1L) - 1;
+
+      /* want = {np,nsize}, converting one bit at a time */
+      want = 0.0;
+      for (i = 0, d = 1.0; i < mant_bits; i++, d *= 2.0)
+        if (np[i/GMP_NUMB_BITS] & (CNST_LIMB(1) << (i%GMP_NUMB_BITS)))
+          want += d;
+      if (sign < 0)
+        want = -want;
+
+      /* want = want * 2^exp */
+      for (i = 0; i < exp; i++)
+        want *= 2.0;
+      for (i = 0; i > exp; i--)
+        want *= 0.5;
+
+      got = mpn_get_d (np, nsize, sign, exp);
+
+      if (got != want)
+        {
+          printf    ("mpn_get_d wrong on random data\n");
+          printf    ("   sign     %ld\n", (long) sign);
+          mpn_trace ("   n        ", np, nsize);
+          printf    ("   nsize    %ld\n", (long) nsize);
+          printf    ("   exp      %ld\n", exp);
+          d_trace   ("   want     ", want);
+          d_trace   ("   got      ", got);
+          abort();
+        }
+    }
+
+  free (np);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_onebit ();
+  check_twobit ();
+  check_inf ();
+  check_underflow ();
+  check_ieee_denorm ();
+  check_ieee_overflow ();
+  check_0x81c25113 ();
+#if ! (defined (__vax) || defined (__vax__))
+  check_rand ();
+#endif
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-hgcd.c b/tests/mpn/t-hgcd.c
new file mode 100644
index 0000000..07f83e9
--- /dev/null
+++ b/tests/mpn/t-hgcd.c

@@ -0,0 +1,406 @@
+/* Test mpn_hgcd.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
+
+#define MIN_OPERAND_SIZE 2
+
+/* Fixed values, for regression testing of mpn_hgcd. */
+struct value { int res; const char *a; const char *b; };
+static const struct value hgcd_values[] = {
+#if GMP_NUMB_BITS == 32
+  { 5,
+    "0x1bddff867272a9296ac493c251d7f46f09a5591fe",
+    "0xb55930a2a68a916450a7de006031068c5ddb0e5c" },
+  { 4,
+    "0x2f0ece5b1ee9c15e132a01d55768dc13",
+    "0x1c6f4fd9873cdb24466e6d03e1cc66e7" },
+  { 3, "0x7FFFFC003FFFFFFFFFC5", "0x3FFFFE001FFFFFFFFFE3"},
+#endif
+  { -1, NULL, NULL }
+};
+
+struct hgcd_ref
+{
+  mpz_t m[2][2];
+};
+
+static void hgcd_ref_init (struct hgcd_ref *);
+static void hgcd_ref_clear (struct hgcd_ref *);
+static int hgcd_ref (struct hgcd_ref *, mpz_t, mpz_t);
+static int hgcd_ref_equal (const struct hgcd_matrix *, const struct hgcd_ref *);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, temp1, temp2;
+  int i, j, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (temp1);
+  mpz_init (temp2);
+
+  for (i = 0; hgcd_values[i].res >= 0; i++)
+    {
+      mp_size_t res;
+
+      mpz_set_str (op1, hgcd_values[i].a, 0);
+      mpz_set_str (op2, hgcd_values[i].b, 0);
+
+      res = one_test (op1, op2, -1-i);
+      if (res != hgcd_values[i].res)
+	{
+	  fprintf (stderr, "ERROR in test %d\n", -1-i);
+	  fprintf (stderr, "Bad return code from hgcd\n");
+	  fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+	  fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+	  fprintf (stderr, "expected: %d\n", hgcd_values[i].res);
+	  fprintf (stderr, "hgcd:     %d\n", (int) res);
+	  abort ();
+	}
+    }
+
+  for (i = 0; i < 15; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+	 have proven to trigger certain bugs in development versions of the
+	 gcd code. */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+
+      if (mpz_cmp (op1, op2) < 0)
+	mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+	one_test (op1, op2, i);
+
+      /* Generate a division chain backwards, allowing otherwise
+	 unlikely huge quotients.  */
+
+      mpz_set_ui (op1, 0);
+      mpz_urandomb (bs, rands, 32);
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+      mpz_add_ui (op2, op2, 1);
+
+#if 0
+      chain_len = 1000000;
+#else
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);
+#endif
+
+      for (j = 0; j < chain_len; j++)
+	{
+	  mpz_urandomb (bs, rands, 32);
+	  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+	  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+	  mpz_add_ui (temp2, temp2, 1);
+	  mpz_mul (temp1, op2, temp2);
+	  mpz_add (op1, op1, temp1);
+
+	  /* Don't generate overly huge operands.  */
+	  if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)
+	    break;
+
+	  mpz_urandomb (bs, rands, 32);
+	  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+	  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+	  mpz_add_ui (temp2, temp2, 1);
+	  mpz_mul (temp1, op1, temp2);
+	  mpz_add (op2, op2, temp1);
+
+	  /* Don't generate overly huge operands.  */
+	  if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)
+	    break;
+	}
+      if (mpz_cmp (op1, op2) < 0)
+	mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+	one_test (op1, op2, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (temp1);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize);
+
+static mp_size_t
+one_test (mpz_t a, mpz_t b, int i)
+{
+  struct hgcd_matrix hgcd;
+  struct hgcd_ref ref;
+
+  mpz_t ref_r0;
+  mpz_t ref_r1;
+  mpz_t hgcd_r0;
+  mpz_t hgcd_r1;
+
+  mp_size_t res[2];
+  mp_size_t asize;
+  mp_size_t bsize;
+
+  mp_size_t hgcd_init_scratch;
+  mp_size_t hgcd_scratch;
+
+  mp_ptr hgcd_init_tp;
+  mp_ptr hgcd_tp;
+
+  asize = a->_mp_size;
+  bsize = b->_mp_size;
+
+  ASSERT (asize >= bsize);
+
+  hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);
+  hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch);
+  mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);
+
+  hgcd_scratch = mpn_hgcd_itch (asize);
+  hgcd_tp = refmpn_malloc_limbs (hgcd_scratch);
+
+#if 0
+  fprintf (stderr,
+	   "one_test: i = %d asize = %d, bsize = %d\n",
+	   i, a->_mp_size, b->_mp_size);
+
+  gmp_fprintf (stderr,
+	       "one_test: i = %d\n"
+	       "  a = %Zx\n"
+	       "  b = %Zx\n",
+	       i, a, b);
+#endif
+  hgcd_ref_init (&ref);
+
+  mpz_init_set (ref_r0, a);
+  mpz_init_set (ref_r1, b);
+  res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+
+  mpz_init_set (hgcd_r0, a);
+  mpz_init_set (hgcd_r1, b);
+  if (bsize < asize)
+    {
+      _mpz_realloc (hgcd_r1, asize);
+      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
+    }
+  res[1] = mpn_hgcd (hgcd_r0->_mp_d,
+		     hgcd_r1->_mp_d,
+		     asize,
+		     &hgcd, hgcd_tp);
+
+  if (res[0] != res[1])
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "Different return value from hgcd and hgcd_ref\n");
+      fprintf (stderr, "op1=");                 debug_mp (a, -16);
+      fprintf (stderr, "op2=");                 debug_mp (b, -16);
+      fprintf (stderr, "hgcd_ref: %ld\n", (long) res[0]);
+      fprintf (stderr, "mpn_hgcd: %ld\n", (long) res[1]);
+      abort ();
+    }
+  if (res[0] > 0)
+    {
+      if (!hgcd_ref_equal (&hgcd, &ref)
+	  || !mpz_mpn_equal (ref_r0, hgcd_r0->_mp_d, res[1])
+	  || !mpz_mpn_equal (ref_r1, hgcd_r1->_mp_d, res[1]))
+	{
+	  fprintf (stderr, "ERROR in test %d\n", i);
+	  fprintf (stderr, "mpn_hgcd and hgcd_ref returned different values\n");
+	  fprintf (stderr, "op1=");                 debug_mp (a, -16);
+	  fprintf (stderr, "op2=");                 debug_mp (b, -16);
+	  abort ();
+	}
+    }
+
+  refmpn_free_limbs (hgcd_init_tp);
+  refmpn_free_limbs (hgcd_tp);
+  hgcd_ref_clear (&ref);
+  mpz_clear (ref_r0);
+  mpz_clear (ref_r1);
+  mpz_clear (hgcd_r0);
+  mpz_clear (hgcd_r1);
+
+  return res[0];
+}
+
+static void
+hgcd_ref_init (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+	mpz_init (hgcd->m[i][j]);
+    }
+}
+
+static void
+hgcd_ref_clear (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+	mpz_clear (hgcd->m[i][j]);
+    }
+}
+
+
+static int
+sdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)
+{
+  mpz_fdiv_qr (q, r, a, b);
+  if (mpz_size (r) <= s)
+    {
+      mpz_add (r, r, b);
+      mpz_sub_ui (q, q, 1);
+    }
+
+  return (mpz_sgn (q) > 0);
+}
+
+static int
+hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+  mp_size_t asize;
+  mp_size_t bsize;
+  mpz_t q;
+  int res;
+
+  if (mpz_size (a) <= s || mpz_size (b) <= s)
+    return 0;
+
+  res = mpz_cmp (a, b);
+  if (res < 0)
+    {
+      mpz_sub (b, b, a);
+      if (mpz_size (b) <= s)
+	return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);
+      mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else if (res > 0)
+    {
+      mpz_sub (a, a, b);
+      if (mpz_size (a) <= s)
+	return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);
+      mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else
+    return 0;
+
+  mpz_init (q);
+
+  for (;;)
+    {
+      ASSERT (mpz_size (a) > s);
+      ASSERT (mpz_size (b) > s);
+
+      if (mpz_cmp (a, b) > 0)
+	{
+	  if (!sdiv_qr (q, a, s, a, b))
+	    break;
+	  mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);
+	  mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);
+	}
+      else
+	{
+	  if (!sdiv_qr (q, b, s, b, a))
+	    break;
+	  mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);
+	  mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);
+	}
+    }
+
+  mpz_clear (q);
+
+  asize = mpz_size (a);
+  bsize = mpz_size (b);
+  return MAX (asize, bsize);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize)
+{
+  mp_srcptr ap = a->_mp_d;
+  mp_size_t asize = a->_mp_size;
+
+  MPN_NORMALIZE (bp, bsize);
+  return asize == bsize && mpn_cmp (ap, bp, asize) == 0;
+}
+
+static int
+hgcd_ref_equal (const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref)
+{
+  unsigned i;
+
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+
+      for (j = 0; j<2; j++)
+	if (!mpz_mpn_equal (ref->m[i][j], hgcd->p[i][j], hgcd->n))
+	  return 0;
+    }
+
+  return 1;
+}

diff --git a/tests/mpn/t-hgcd_appr.c b/tests/mpn/t-hgcd_appr.c
new file mode 100644
index 0000000..f9e2ea8
--- /dev/null
+++ b/tests/mpn/t-hgcd_appr.c

@@ -0,0 +1,563 @@
+/* Test mpn_hgcd_appr.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2004, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
+
+#define MIN_OPERAND_SIZE 2
+
+struct hgcd_ref
+{
+  mpz_t m[2][2];
+};
+
+static void hgcd_ref_init (struct hgcd_ref *hgcd);
+static void hgcd_ref_clear (struct hgcd_ref *hgcd);
+static int hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b);
+static int hgcd_ref_equal (const struct hgcd_ref *, const struct hgcd_ref *);
+static int hgcd_appr_valid_p (mpz_t, mpz_t, mp_size_t, struct hgcd_ref *,
+			      mpz_t, mpz_t, mp_size_t, struct hgcd_matrix *);
+
+static int verbose_flag = 0;
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, temp1, temp2;
+  int i, j, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  if (argc > 1)
+    {
+      if (strcmp (argv[1], "-v") == 0)
+	verbose_flag = 1;
+      else
+	{
+	  fprintf (stderr, "Invalid argument.\n");
+	  return 1;
+	}
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (temp1);
+  mpz_init (temp2);
+
+  for (i = 0; i < 15; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+	 have proven to trigger certain bugs in development versions of the
+	 gcd code. */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_urandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_urandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+
+      if (mpz_cmp (op1, op2) < 0)
+	mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+	one_test (op1, op2, i);
+
+      /* Generate a division chain backwards, allowing otherwise
+	 unlikely huge quotients.  */
+
+      mpz_set_ui (op1, 0);
+      mpz_urandomb (bs, rands, 32);
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+      mpz_add_ui (op2, op2, 1);
+
+#if 0
+      chain_len = 1000000;
+#else
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);
+#endif
+
+      for (j = 0; j < chain_len; j++)
+	{
+	  mpz_urandomb (bs, rands, 32);
+	  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+	  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+	  mpz_add_ui (temp2, temp2, 1);
+	  mpz_mul (temp1, op2, temp2);
+	  mpz_add (op1, op1, temp1);
+
+	  /* Don't generate overly huge operands.  */
+	  if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)
+	    break;
+
+	  mpz_urandomb (bs, rands, 32);
+	  mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+	  mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+	  mpz_add_ui (temp2, temp2, 1);
+	  mpz_mul (temp1, op1, temp2);
+	  mpz_add (op2, op2, temp1);
+
+	  /* Don't generate overly huge operands.  */
+	  if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)
+	    break;
+	}
+      if (mpz_cmp (op1, op2) < 0)
+	mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+	one_test (op1, op2, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (temp1);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+static mp_size_t
+one_test (mpz_t a, mpz_t b, int i)
+{
+  struct hgcd_matrix hgcd;
+  struct hgcd_ref ref;
+
+  mpz_t ref_r0;
+  mpz_t ref_r1;
+  mpz_t hgcd_r0;
+  mpz_t hgcd_r1;
+
+  int res[2];
+  mp_size_t asize;
+  mp_size_t bsize;
+
+  mp_size_t hgcd_init_scratch;
+  mp_size_t hgcd_scratch;
+
+  mp_ptr hgcd_init_tp;
+  mp_ptr hgcd_tp;
+  mp_limb_t marker[4];
+
+  asize = a->_mp_size;
+  bsize = b->_mp_size;
+
+  ASSERT (asize >= bsize);
+
+  hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);
+  hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch + 2) + 1;
+  mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);
+
+  hgcd_scratch = mpn_hgcd_appr_itch (asize);
+  hgcd_tp = refmpn_malloc_limbs (hgcd_scratch + 2) + 1;
+
+  mpn_random (marker, 4);
+
+  hgcd_init_tp[-1] = marker[0];
+  hgcd_init_tp[hgcd_init_scratch] = marker[1];
+  hgcd_tp[-1] = marker[2];
+  hgcd_tp[hgcd_scratch] = marker[3];
+
+#if 0
+  fprintf (stderr,
+	   "one_test: i = %d asize = %d, bsize = %d\n",
+	   i, a->_mp_size, b->_mp_size);
+
+  gmp_fprintf (stderr,
+	       "one_test: i = %d\n"
+	       "  a = %Zx\n"
+	       "  b = %Zx\n",
+	       i, a, b);
+#endif
+  hgcd_ref_init (&ref);
+
+  mpz_init_set (ref_r0, a);
+  mpz_init_set (ref_r1, b);
+  res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+
+  mpz_init_set (hgcd_r0, a);
+  mpz_init_set (hgcd_r1, b);
+  if (bsize < asize)
+    {
+      _mpz_realloc (hgcd_r1, asize);
+      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
+    }
+  res[1] = mpn_hgcd_appr (hgcd_r0->_mp_d,
+			  hgcd_r1->_mp_d,
+			  asize,
+			  &hgcd, hgcd_tp);
+
+  if (hgcd_init_tp[-1] != marker[0]
+      || hgcd_init_tp[hgcd_init_scratch] != marker[1]
+      || hgcd_tp[-1] != marker[2]
+      || hgcd_tp[hgcd_scratch] != marker[3])
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "scratch space overwritten!\n");
+
+      if (hgcd_init_tp[-1] != marker[0])
+	gmp_fprintf (stderr,
+		     "before init_tp: %Mx\n"
+		     "expected: %Mx\n",
+		     hgcd_init_tp[-1], marker[0]);
+      if (hgcd_init_tp[hgcd_init_scratch] != marker[1])
+	gmp_fprintf (stderr,
+		     "after init_tp: %Mx\n"
+		     "expected: %Mx\n",
+		     hgcd_init_tp[hgcd_init_scratch], marker[1]);
+      if (hgcd_tp[-1] != marker[2])
+	gmp_fprintf (stderr,
+		     "before tp: %Mx\n"
+		     "expected: %Mx\n",
+		     hgcd_tp[-1], marker[2]);
+      if (hgcd_tp[hgcd_scratch] != marker[3])
+	gmp_fprintf (stderr,
+		     "after tp: %Mx\n"
+		     "expected: %Mx\n",
+		     hgcd_tp[hgcd_scratch], marker[3]);
+
+      abort ();
+    }
+
+  if (!hgcd_appr_valid_p (a, b, res[0], &ref, ref_r0, ref_r1,
+			  res[1], &hgcd))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "Invalid results for hgcd and hgcd_ref\n");
+      fprintf (stderr, "op1=");                 debug_mp (a, -16);
+      fprintf (stderr, "op2=");                 debug_mp (b, -16);
+      fprintf (stderr, "hgcd_ref: %ld\n", (long) res[0]);
+      fprintf (stderr, "mpn_hgcd_appr: %ld\n", (long) res[1]);
+      abort ();
+    }
+
+  refmpn_free_limbs (hgcd_init_tp - 1);
+  refmpn_free_limbs (hgcd_tp - 1);
+  hgcd_ref_clear (&ref);
+  mpz_clear (ref_r0);
+  mpz_clear (ref_r1);
+  mpz_clear (hgcd_r0);
+  mpz_clear (hgcd_r1);
+
+  return res[0];
+}
+
+static void
+hgcd_ref_init (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+	mpz_init (hgcd->m[i][j]);
+    }
+}
+
+static void
+hgcd_ref_clear (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+	mpz_clear (hgcd->m[i][j]);
+    }
+}
+
+static int
+sdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)
+{
+  mpz_fdiv_qr (q, r, a, b);
+  if (mpz_size (r) <= s)
+    {
+      mpz_add (r, r, b);
+      mpz_sub_ui (q, q, 1);
+    }
+
+  return (mpz_sgn (q) > 0);
+}
+
+static int
+hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+  mpz_t q;
+  int res;
+
+  if (mpz_size (a) <= s || mpz_size (b) <= s)
+    return 0;
+
+  res = mpz_cmp (a, b);
+  if (res < 0)
+    {
+      mpz_sub (b, b, a);
+      if (mpz_size (b) <= s)
+	return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);
+      mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else if (res > 0)
+    {
+      mpz_sub (a, a, b);
+      if (mpz_size (a) <= s)
+	return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);
+      mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else
+    return 0;
+
+  mpz_init (q);
+
+  for (;;)
+    {
+      ASSERT (mpz_size (a) > s);
+      ASSERT (mpz_size (b) > s);
+
+      if (mpz_cmp (a, b) > 0)
+	{
+	  if (!sdiv_qr (q, a, s, a, b))
+	    break;
+	  mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);
+	  mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);
+	}
+      else
+	{
+	  if (!sdiv_qr (q, b, s, b, a))
+	    break;
+	  mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);
+	  mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);
+	}
+    }
+
+  mpz_clear (q);
+
+  return 1;
+}
+
+static int
+hgcd_ref_equal (const struct hgcd_ref *A, const struct hgcd_ref *B)
+{
+  unsigned i;
+
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+
+      for (j = 0; j<2; j++)
+	if (mpz_cmp (A->m[i][j], B->m[i][j]) != 0)
+	  return 0;
+    }
+
+  return 1;
+}
+
+static int
+hgcd_appr_valid_p (mpz_t a, mpz_t b, mp_size_t res0,
+		   struct hgcd_ref *ref, mpz_t ref_r0, mpz_t ref_r1,
+		   mp_size_t res1, struct hgcd_matrix *hgcd)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+
+  mp_bitcnt_t dbits, abits, margin;
+  mpz_t appr_r0, appr_r1, t, q;
+  struct hgcd_ref appr;
+
+  if (!res0)
+    {
+      if (!res1)
+	return 1;
+
+      fprintf (stderr, "mpn_hgcd_appr returned 1 when no reduction possible.\n");
+      return 0;
+    }
+
+  /* NOTE: No *_clear calls on error return, since we're going to
+     abort anyway. */
+  mpz_init (t);
+  mpz_init (q);
+  hgcd_ref_init (&appr);
+  mpz_init (appr_r0);
+  mpz_init (appr_r1);
+
+  if (mpz_size (ref_r0) <= s)
+    {
+      fprintf (stderr, "ref_r0 too small!!!: "); debug_mp (ref_r0, 16);
+      return 0;
+    }
+  if (mpz_size (ref_r1) <= s)
+    {
+      fprintf (stderr, "ref_r1 too small!!!: "); debug_mp (ref_r1, 16);
+      return 0;
+    }
+
+  mpz_sub (t, ref_r0, ref_r1);
+  dbits = mpz_sizeinbase (t, 2);
+  if (dbits > s*GMP_NUMB_BITS)
+    {
+      fprintf (stderr, "ref |r0 - r1| too large!!!: "); debug_mp (t, 16);
+      return 0;
+    }
+
+  if (!res1)
+    {
+      mpz_set (appr_r0, a);
+      mpz_set (appr_r1, b);
+    }
+  else
+    {
+      unsigned i;
+
+      for (i = 0; i<2; i++)
+	{
+	  unsigned j;
+
+	  for (j = 0; j<2; j++)
+	    {
+	      mp_size_t mn = hgcd->n;
+	      MPN_NORMALIZE (hgcd->p[i][j], mn);
+	      mpz_realloc (appr.m[i][j], mn);
+	      MPN_COPY (PTR (appr.m[i][j]), hgcd->p[i][j], mn);
+	      SIZ (appr.m[i][j]) = mn;
+	    }
+	}
+      mpz_mul (appr_r0, appr.m[1][1], a);
+      mpz_mul (t, appr.m[0][1], b);
+      mpz_sub (appr_r0, appr_r0, t);
+      if (mpz_sgn (appr_r0) <= 0
+	  || mpz_size (appr_r0) <= s)
+	{
+	  fprintf (stderr, "appr_r0 too small: "); debug_mp (appr_r0, 16);
+	  return 0;
+	}
+
+      mpz_mul (appr_r1, appr.m[1][0], a);
+      mpz_mul (t, appr.m[0][0], b);
+      mpz_sub (appr_r1, t, appr_r1);
+      if (mpz_sgn (appr_r1) <= 0
+	  || mpz_size (appr_r1) <= s)
+	{
+	  fprintf (stderr, "appr_r1 too small: "); debug_mp (appr_r1, 16);
+	  return 0;
+	}
+    }
+
+  mpz_sub (t, appr_r0, appr_r1);
+  abits = mpz_sizeinbase (t, 2);
+  if (abits < dbits)
+    {
+      fprintf (stderr, "|r0 - r1| too small: "); debug_mp (t, 16);
+      return 0;
+    }
+
+  /* We lose one bit each time we discard the least significant limbs.
+     For the lehmer code, that can happen at most s * (GMP_NUMB_BITS)
+     / (GMP_NUMB_BITS - 1) times. For the dc code, we lose an entire
+     limb (or more?) for each level of recursion. */
+
+  margin = (n/2+1) * GMP_NUMB_BITS / (GMP_NUMB_BITS - 1);
+  {
+    mp_size_t rn;
+    for (rn = n; ABOVE_THRESHOLD (rn, HGCD_APPR_THRESHOLD); rn = (rn + 1)/2)
+      margin += GMP_NUMB_BITS;
+  }
+
+  if (verbose_flag && abits > dbits)
+    fprintf (stderr, "n = %u: sbits = %u: ref #(r0-r1): %u, appr #(r0-r1): %u excess: %d, margin: %u\n",
+	     (unsigned) n, (unsigned) s*GMP_NUMB_BITS,
+	     (unsigned) dbits, (unsigned) abits,
+	     (int) (abits - s * GMP_NUMB_BITS), (unsigned) margin);
+
+  if (abits > s*GMP_NUMB_BITS + margin)
+    {
+      fprintf (stderr, "appr |r0 - r1| much larger than minimal (by %u bits, margin = %u bits)\n",
+	       (unsigned) (abits - s*GMP_NUMB_BITS), (unsigned) margin);
+      return 0;
+    }
+
+  while (mpz_cmp (appr_r0, ref_r0) > 0 || mpz_cmp (appr_r1, ref_r1) > 0)
+    {
+      ASSERT (mpz_size (appr_r0) > s);
+      ASSERT (mpz_size (appr_r1) > s);
+
+      if (mpz_cmp (appr_r0, appr_r1) > 0)
+	{
+	  if (!sdiv_qr (q, appr_r0, s, appr_r0, appr_r1))
+	    break;
+	  mpz_addmul (appr.m[0][1], q, appr.m[0][0]);
+	  mpz_addmul (appr.m[1][1], q, appr.m[1][0]);
+	}
+      else
+	{
+	  if (!sdiv_qr (q, appr_r1, s, appr_r1, appr_r0))
+	    break;
+	  mpz_addmul (appr.m[0][0], q, appr.m[0][1]);
+	  mpz_addmul (appr.m[1][0], q, appr.m[1][1]);
+	}
+    }
+
+  if (mpz_cmp (appr_r0, ref_r0) != 0
+      || mpz_cmp (appr_r1, ref_r1) != 0
+      || !hgcd_ref_equal (ref, &appr))
+    {
+      fprintf (stderr, "appr_r0: "); debug_mp (appr_r0, 16);
+      fprintf (stderr, "ref_r0: "); debug_mp (ref_r0, 16);
+
+      fprintf (stderr, "appr_r1: "); debug_mp (appr_r1, 16);
+      fprintf (stderr, "ref_r1: "); debug_mp (ref_r1, 16);
+
+      return 0;
+    }
+  mpz_clear (t);
+  mpz_clear (q);
+  hgcd_ref_clear (&appr);
+  mpz_clear (appr_r0);
+  mpz_clear (appr_r1);
+
+  return 1;
+}

diff --git a/tests/mpn/t-instrument.c b/tests/mpn/t-instrument.c
new file mode 100644
index 0000000..694e171
--- /dev/null
+++ b/tests/mpn/t-instrument.c

@@ -0,0 +1,415 @@
+/* Test assembler support for --enable-profiling=instrument.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+#if WANT_PROFILING_INSTRUMENT
+
+/* This program exercises each mpn routine that might be implemented in
+   assembler.  It ensures the __cyg_profile_func_enter and exit calls have
+   come out right, and that in the x86 code "ret_internal" is correctly used
+   for PIC setups.  */
+
+
+/* Changes to enter_seen done by __cyg_profile_func_enter are essentially
+   unknown to the optimizer, so must use volatile.  */
+volatile int  enter_seen;
+
+/* Dummy used to stop various calls going dead. */
+unsigned long  notdead;
+
+const char     *name = "<none>";
+int  old_ncall;
+
+struct {
+  void  *this_fn;
+  void  *call_site;
+} call[100];
+int  ncall;
+
+
+void __cyg_profile_func_enter (void *, void *)
+  __attribute__ ((no_instrument_function));
+
+void
+__cyg_profile_func_enter (void *this_fn, void *call_site)
+{
+#if 0
+  printf ("%24s %p %p\n", name, this_fn, call_site);
+#endif
+  ASSERT_ALWAYS (ncall >= 0);
+  ASSERT_ALWAYS (ncall <= numberof (call));
+
+  if (ncall >= numberof (call))
+    {
+      printf ("__cyg_profile_func_enter: oops, call stack full, from %s\n", name);
+      abort ();
+    }
+
+  enter_seen = 1;
+  call[ncall].this_fn = this_fn;
+  call[ncall].call_site = call_site;
+  ncall++;
+}
+
+void __cyg_profile_func_exit (void *, void *)
+  __attribute__ ((no_instrument_function));
+
+void
+__cyg_profile_func_exit  (void *this_fn, void *call_site)
+{
+  ASSERT_ALWAYS (ncall >= 0);
+  ASSERT_ALWAYS (ncall <= numberof (call));
+
+  if (ncall == 0)
+    {
+      printf ("__cyg_profile_func_exit: call stack empty, from %s\n", name);
+      abort ();
+    }
+
+  ncall--;
+  if (this_fn != call[ncall].this_fn || call_site != call[ncall].call_site)
+    {
+      printf ("__cyg_profile_func_exit: unbalanced this_fn/call_site from %s\n", name);
+      printf ("  this_fn got  %p\n", this_fn);
+      printf ("          want %p\n", call[ncall].this_fn);
+      printf ("  call_site got  %p\n", call_site);
+      printf ("            want %p\n", call[ncall].call_site);
+      abort ();
+    }
+}
+
+
+void
+pre (const char *str)
+{
+  name = str;
+  enter_seen = 0;
+  old_ncall = ncall;
+}
+
+void
+post (void)
+{
+  if (! enter_seen)
+    {
+      printf ("did not reach __cyg_profile_func_enter from %s\n", name);
+      abort ();
+    }
+
+  if (ncall != old_ncall)
+    {
+      printf ("unbalance enter/exit calls from %s\n", name);
+      printf ("  ncall     %d\n", ncall);
+      printf ("  old_ncall %d\n", old_ncall);
+      abort ();
+    }
+}
+
+void
+check (void)
+{
+  mp_limb_t  wp[100], xp[100], yp[100];
+  mp_size_t  size = 100;
+
+  refmpn_zero (xp, size);
+  refmpn_zero (yp, size);
+  refmpn_zero (wp, size);
+
+  pre ("mpn_add_n");
+  mpn_add_n (wp, xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_add_nc
+  pre ("mpn_add_nc");
+  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  pre ("mpn_addlsh1_n");
+  mpn_addlsh1_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_and_n
+  pre ("mpn_and_n");
+  mpn_and_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_andn_n
+  pre ("mpn_andn_n");
+  mpn_andn_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_addmul_1");
+  mpn_addmul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_addmul_1c
+  pre ("mpn_addmul_1c");
+  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_com
+  pre ("mpn_com");
+  mpn_com (wp, xp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_copyd
+  pre ("mpn_copyd");
+  mpn_copyd (wp, xp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  pre ("mpn_copyi");
+  mpn_copyi (wp, xp, size);
+  post ();
+#endif
+
+  pre ("mpn_divexact_1");
+  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_divexact_by3c");
+  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));
+  post ();
+
+  pre ("mpn_divrem_1");
+  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));
+  post ();
+
+#if HAVE_NATIVE_mpn_divrem_1c
+  pre ("mpn_divrem_1c");
+  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));
+  post ();
+#endif
+
+  pre ("mpn_gcd_1");
+  xp[0] |= 1;
+  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_hamdist");
+  notdead += mpn_hamdist (xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_ior_n
+  pre ("mpn_ior_n");
+  mpn_ior_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_iorn_n
+  pre ("mpn_iorn_n");
+  mpn_iorn_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_lshift");
+  mpn_lshift (wp, xp, size, 1);
+  post ();
+
+  pre ("mpn_mod_1");
+  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));
+  post ();
+
+#if HAVE_NATIVE_mpn_mod_1c
+  pre ("mpn_mod_1c");
+  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));
+  post ();
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+  pre ("mpn_mod_34lsub1");
+  notdead += mpn_mod_34lsub1 (xp, size);
+  post ();
+#endif
+
+  pre ("mpn_modexact_1_odd");
+  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_modexact_1c_odd");
+  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));
+  post ();
+
+  pre ("mpn_mul_1");
+  mpn_mul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_mul_1c
+  pre ("mpn_mul_1c");
+  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_mul_2
+  pre ("mpn_mul_2");
+  mpn_mul_2 (wp, xp, size-1, yp);
+  post ();
+#endif
+
+  pre ("mpn_mul_basecase");
+  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);
+  post ();
+
+#if HAVE_NATIVE_mpn_nand_n
+  pre ("mpn_nand_n");
+  mpn_nand_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_nior_n
+  pre ("mpn_nior_n");
+  mpn_nior_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_popcount");
+  notdead += mpn_popcount (xp, size);
+  post ();
+
+  pre ("mpn_preinv_mod_1");
+  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,
+                               refmpn_invert_limb (GMP_NUMB_MAX));
+  post ();
+
+#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
+  pre ("mpn_preinv_divrem_1");
+  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,
+                       refmpn_invert_limb (GMP_NUMB_MAX), 0);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_rsh1add_n
+  pre ("mpn_rsh1add_n");
+  mpn_rsh1add_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  pre ("mpn_rsh1sub_n");
+  mpn_rsh1sub_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_rshift");
+  mpn_rshift (wp, xp, size, 1);
+  post ();
+
+  pre ("mpn_sqr_basecase");
+  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);
+  post ();
+
+  pre ("mpn_submul_1");
+  mpn_submul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_submul_1c
+  pre ("mpn_submul_1c");
+  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+  pre ("mpn_sub_n");
+  mpn_sub_n (wp, xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_sub_nc
+  pre ("mpn_sub_nc");
+  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh1_n
+  pre ("mpn_sublsh1_n");
+  mpn_sublsh1_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+  pre ("mpn_udiv_qrnnd");
+  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+  pre ("mpn_udiv_qrnnd_r");
+  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm
+  pre ("mpn_umul_ppmm");
+  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+  pre ("mpn_umul_ppmm_r");
+  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_xor_n
+  pre ("mpn_xor_n");
+  mpn_xor_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_xnor_n
+  pre ("mpn_xnor_n");
+  mpn_xnor_n (wp, xp, yp, size);
+  post ();
+#endif
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check ();
+
+  tests_end ();
+  exit (0);
+}
+
+
+#else /* ! WANT_PROFILING_INSTRUMENT */
+
+int
+main (void)
+{
+  exit (0);
+}
+
+#endif

diff --git a/tests/mpn/t-invert.c b/tests/mpn/t-invert.c
new file mode 100644
index 0000000..1493467
--- /dev/null
+++ b/tests/mpn/t-invert.c

@@ -0,0 +1,151 @@
+/* Test for mpn_invert function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 12
+#endif
+
+#ifndef COUNT
+#define COUNT 1000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+
+static int
+invert_valid (mp_srcptr ip, mp_srcptr dp, mp_size_t n)
+{
+  mp_ptr tp;
+  int cy;
+  TMP_DECL;
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (2*n);
+
+  refmpn_mul (tp, ip, n, dp, n);
+  cy  = refmpn_add_n (tp + n, tp + n, dp, n); /* This must not give a carry. */
+  cy -= refmpn_add (tp, tp, 2*n, dp, n); /* This must give a carry. */
+  TMP_FREE;
+
+  return (cy == -1);
+}
+
+/*
+  Check the result of the mpn_invert function in the library.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ip, dp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  dp = TMP_ALLOC_LIMBS (MAX_N);
+  ip = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_invert_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t n;
+      mp_size_t itch;
+      mp_limb_t i_before, i_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+
+      mpn_random2 (dp, n);
+
+      mpn_random2 (ip-1, n + 2);
+      i_before = ip[-1];
+      i_after = ip[n];
+
+      itch = mpn_invert_itch (n);
+      ASSERT_ALWAYS (itch <= mpn_invert_itch (MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      dp[n-1] |= GMP_NUMB_HIGHBIT;
+      mpn_invert (ip, dp, n, scratch);
+      if (ip[-1] != i_before || ip[n] != i_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || ! invert_valid(ip, dp, n))
+	{
+	  printf ("ERROR in test %d, n = %d\n",
+		  test, (int) n);
+	  if (ip[-1] != i_before)
+	    {
+	      printf ("before ip:"); mpn_dump (ip -1, 1);
+	      printf ("keep:   "); mpn_dump (&i_before, 1);
+	    }
+	  if (ip[n] != i_after)
+	    {
+	      printf ("after ip:"); mpn_dump (ip + n, 1);
+	      printf ("keep:   "); mpn_dump (&i_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (dp, n);
+	  mpn_dump (ip, n);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-iord_u.c b/tests/mpn/t-iord_u.c
new file mode 100644
index 0000000..2b27713
--- /dev/null
+++ b/tests/mpn/t-iord_u.c

@@ -0,0 +1,220 @@
+/* Test MPN_INCR_U and MPN_DECR_U.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* The i386 MPN_INCR_U and MPN_DECR_U have special cases for "n" being a
+   compile-time constant 1, so that's exercised explicitly.  */
+
+
+#define M     GMP_NUMB_MAX
+#define SIZE  ((mp_size_t) 10)
+
+
+void
+check_one (const char *name, int i,
+           mp_srcptr src, mp_limb_t n,
+           mp_srcptr got, mp_srcptr want, mp_size_t size)
+{
+  if (! refmpn_equal_anynail (got, want, size))
+    {
+      printf ("Wrong at %s i=%d\n", name, i);
+      mpn_trace ("  src", src,  size);
+      mpn_trace ("    n", &n,   (mp_size_t) 1);
+      mpn_trace ("  got", got,  size);
+      mpn_trace (" want", want, size);
+      abort ();
+    }
+}
+
+
+void
+check_incr_data (void)
+{
+  static const struct {
+    mp_limb_t        n;
+    const mp_limb_t  src[SIZE];
+    const mp_limb_t  want[SIZE];
+  } data[] = {
+    { 1, { 0 },   { 1 } },
+    { 1, { 123 }, { 124 } },
+    { 2, { 0 },   { 2 } },
+    { 2, { 123 }, { 125 } },
+    { M, { 0 },   { M } },
+
+    { 1, { M, 0 },   { 0,   1 } },
+    { 1, { M, 123 }, { 0,   124 } },
+    { 2, { M, 0 },   { 1,   1 } },
+    { 2, { M, 123 }, { 1,   124 } },
+    { M, { M, 0 },   { M-1, 1 } },
+    { M, { M, 123 }, { M-1, 124 } },
+
+    { 1, { M, M, 0 },   { 0,   0, 1 } },
+    { 1, { M, M, 123 }, { 0,   0, 124 } },
+    { 2, { M, M, 0 },   { 1,   0, 1 } },
+    { 2, { M, M, 123 }, { 1,   0, 124 } },
+    { M, { M, M, 0 },   { M-1, 0, 1 } },
+    { M, { M, M, 123 }, { M-1, 0, 124 } },
+
+    { 1, { M, M, M, 0 },   { 0,   0, 0, 1 } },
+    { 1, { M, M, M, 123 }, { 0,   0, 0, 124 } },
+    { 2, { M, M, M, 0 },   { 1,   0, 0, 1 } },
+    { 2, { M, M, M, 123 }, { 1,   0, 0, 124 } },
+    { M, { M, M, M, 0 },   { M-1, 0, 0, 1 } },
+    { M, { M, M, M, 123 }, { M-1, 0, 0, 124 } },
+
+    { 1, { M, M, M, M, 0 },   { 0,   0, 0, 0, 1 } },
+    { 1, { M, M, M, M, 123 }, { 0,   0, 0, 0, 124 } },
+    { 2, { M, M, M, M, 0 },   { 1,   0, 0, 0, 1 } },
+    { 2, { M, M, M, M, 123 }, { 1,   0, 0, 0, 124 } },
+    { M, { M, M, M, M, 0 },   { M-1, 0, 0, 0, 1 } },
+    { M, { M, M, M, M, 123 }, { M-1, 0, 0, 0, 124
+#if defined (__hpux) && ! defined (__GNUC__)
+    /* Some versions (at least HP92453-01 B.11.11.23709.GP) of the
+       HP C compilers fail to zero-fill aggregates as the ISO C standard
+       requires (cf 6.5.7 Initialization).  Compensate here:  */
+				, 0, 0, 0, 0, 0
+#endif
+    } }
+  };
+
+  mp_limb_t  got[SIZE];
+  int   i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_copyi (got, data[i].src, SIZE);
+      MPN_INCR_U (got, SIZE, data[i].n);
+      check_one ("check_incr (general)", i,
+                 data[i].src, data[i].n,
+                 got, data[i].want, SIZE);
+
+      if (data[i].n == 1)
+        {
+          refmpn_copyi (got, data[i].src, SIZE);
+          MPN_INCR_U (got, SIZE, CNST_LIMB(1));
+          check_one ("check_incr (const 1)", i,
+                     data[i].src, data[i].n,
+                     got, data[i].want, SIZE);
+        }
+    }
+}
+
+void
+check_decr_data (void)
+{
+  static const struct {
+    mp_limb_t        n;
+    const mp_limb_t  src[SIZE];
+    const mp_limb_t  want[SIZE];
+  } data[] = {
+    { 1,   { 1 },   { 0   } },
+    { 1,   { 123 }, { 122 } },
+    { 1,   { M },   { M-1 } },
+    { 2,   { 2 },   { 0   } },
+    { 2,   { 123 }, { 121 } },
+    { M,   { M },   { 0   } },
+    { M-1, { M },   { 1   } },
+
+    { 1,   { 0,   1   }, { M,   0   } },
+    { 1,   { 0,   123 }, { M,   122 } },
+    { 1,   { 0,   M   }, { M,   M-1 } },
+    { 2,   { 0,   123 }, { M-1, 122 } },
+    { 2,   { 1,   123 }, { M,   122 } },
+    { M,   { 0,   123 }, { 1,   122 } },
+    { M,   { M-1, M   }, { M,   M-1 } },
+
+    { 1,   { 0,   0, 1   }, { M,   M, 0   } },
+    { 1,   { 0,   0, 123 }, { M,   M, 122 } },
+    { 1,   { 0,   0, M   }, { M,   M, M-1 } },
+    { 2,   { 0,   0, 123 }, { M-1, M, 122 } },
+    { 2,   { 1,   0, 123 }, { M,   M, 122 } },
+    { M,   { 0,   0, 123 }, { 1,   M, 122 } },
+    { M,   { M-1, 0, M   }, { M,   M, M-1 } },
+
+    { 1,   { 0,   0, 0, 1   }, { M,   M, M, 0   } },
+    { 1,   { 0,   0, 0, 123 }, { M,   M, M, 122 } },
+    { 1,   { 0,   0, 0, M   }, { M,   M, M, M-1 } },
+    { 2,   { 0,   0, 0, 123 }, { M-1, M, M, 122 } },
+    { 2,   { 1,   0, 0, 123 }, { M,   M, M, 122 } },
+    { M,   { 0,   0, 0, 123 }, { 1,   M, M, 122 } },
+    { M,   { M-1, 0, 0, M   }, { M,   M, M, M-1 } },
+
+    { 1,   { 0,   0, 0, 0, 1   }, { M,   M, M, M, 0   } },
+    { 1,   { 0,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },
+    { 1,   { 0,   0, 0, 0, M   }, { M,   M, M, M, M-1 } },
+    { 2,   { 0,   0, 0, 0, 123 }, { M-1, M, M, M, 122 } },
+    { 2,   { 1,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },
+    { M,   { 0,   0, 0, 0, 123 }, { 1,   M, M, M, 122 } },
+    { M,   { M-1, 0, 0, 0, M   }, { M,   M, M, M, M-1 } },
+
+    { 1,   { 0,   0, 0, 0, 0, 1   }, { M,   M, M, M, M, 0   } },
+    { 1,   { 0,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },
+    { 1,   { 0,   0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1 } },
+    { 2,   { 0,   0, 0, 0, 0, 123 }, { M-1, M, M, M, M, 122 } },
+    { 2,   { 1,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },
+    { M,   { 0,   0, 0, 0, 0, 123 }, { 1,   M, M, M, M, 122 } },
+    { M,   { M-1, 0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1
+#if defined (__hpux) && ! defined (__GNUC__)
+    /* For explanation of this garbage, see previous function.  */
+				       , 0, 0, 0, 0
+#endif
+    } }
+  };
+
+  mp_limb_t  got[SIZE];
+  int   i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_copyi (got, data[i].src, SIZE);
+      MPN_DECR_U (got, SIZE, data[i].n);
+      check_one ("check_decr_data", i,
+                 data[i].src, data[i].n,
+                 got, data[i].want, SIZE);
+
+      if (data[i].n == 1)
+        {
+          refmpn_copyi (got, data[i].src, SIZE);
+          MPN_DECR_U (got, SIZE, CNST_LIMB(1));
+          check_one ("check_decr (const 1)", i,
+                     data[i].src, data[i].n,
+                     got, data[i].want, SIZE);
+        }
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_incr_data ();
+  check_decr_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-matrix22.c b/tests/mpn/t-matrix22.c
new file mode 100644
index 0000000..1fa1e3f
--- /dev/null
+++ b/tests/mpn/t-matrix22.c

@@ -0,0 +1,206 @@
+/* Tests matrix22_mul.
+
+Copyright 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+struct matrix {
+  mp_size_t alloc;
+  mp_size_t n;
+  mp_ptr e00, e01, e10, e11;
+};
+
+static void
+matrix_init (struct matrix *M, mp_size_t n)
+{
+  mp_ptr p = refmpn_malloc_limbs (4*(n+1));
+  M->e00 = p; p += n+1;
+  M->e01 = p; p += n+1;
+  M->e10 = p; p += n+1;
+  M->e11 = p;
+  M->alloc = n + 1;
+  M->n = 0;
+}
+
+static void
+matrix_clear (struct matrix *M)
+{
+  refmpn_free_limbs (M->e00);
+}
+
+static void
+matrix_copy (struct matrix *R, const struct matrix *M)
+{
+  R->n = M->n;
+  MPN_COPY (R->e00, M->e00, M->n);
+  MPN_COPY (R->e01, M->e01, M->n);
+  MPN_COPY (R->e10, M->e10, M->n);
+  MPN_COPY (R->e11, M->e11, M->n);
+}
+
+/* Used with same size, so no need for normalization. */
+static int
+matrix_equal_p (const struct matrix *A, const struct matrix *B)
+{
+  return (A->n == B->n
+	  && mpn_cmp (A->e00, B->e00, A->n) == 0
+	  && mpn_cmp (A->e01, B->e01, A->n) == 0
+	  && mpn_cmp (A->e10, B->e10, A->n) == 0
+	  && mpn_cmp (A->e11, B->e11, A->n) == 0);
+}
+
+static void
+matrix_random(struct matrix *M, mp_size_t n, gmp_randstate_ptr rands)
+{
+  M->n = n;
+  mpn_random (M->e00, n);
+  mpn_random (M->e01, n);
+  mpn_random (M->e10, n);
+  mpn_random (M->e11, n);
+}
+
+#define MUL(rp, ap, an, bp, bn) do { \
+    if (an > bn)		     \
+      mpn_mul (rp, ap, an, bp, bn);  \
+    else			     \
+      mpn_mul (rp, bp, bn, ap, an);  \
+  } while(0)
+
+static void
+ref_matrix22_mul (struct matrix *R,
+		  const struct matrix *A,
+		  const struct matrix *B, mp_ptr tp)
+{
+  mp_size_t an, bn, n;
+  mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11;
+
+  if (A->n >= B->n)
+    {
+      r00 = R->e00; a00 = A->e00; b00 = B->e00;
+      r01 = R->e01; a01 = A->e01; b01 = B->e01;
+      r10 = R->e10; a10 = A->e10; b10 = B->e10;
+      r11 = R->e11; a11 = A->e11; b11 = B->e11;
+      an = A->n, bn = B->n;
+    }
+  else
+    {
+      /* Transpose */
+      r00 = R->e00; a00 = B->e00; b00 = A->e00;
+      r01 = R->e10; a01 = B->e10; b01 = A->e10;
+      r10 = R->e01; a10 = B->e01; b10 = A->e01;
+      r11 = R->e11; a11 = B->e11; b11 = A->e11;
+      an = B->n, bn = A->n;
+    }
+  n = an + bn;
+  R->n = n + 1;
+
+  mpn_mul (r00, a00, an, b00, bn);
+  mpn_mul (tp, a01, an, b10, bn);
+  r00[n] = mpn_add_n (r00, r00, tp, n);
+
+  mpn_mul (r01, a00, an, b01, bn);
+  mpn_mul (tp, a01, an, b11, bn);
+  r01[n] = mpn_add_n (r01, r01, tp, n);
+
+  mpn_mul (r10, a10, an, b00, bn);
+  mpn_mul (tp, a11, an, b10, bn);
+  r10[n] = mpn_add_n (r10, r10, tp, n);
+
+  mpn_mul (r11, a10, an, b01, bn);
+  mpn_mul (tp, a11, an, b11, bn);
+  r11[n] = mpn_add_n (r11, r11, tp, n);
+}
+
+static void
+one_test (const struct matrix *A, const struct matrix *B, int i)
+{
+  struct matrix R;
+  struct matrix P;
+  mp_ptr tp;
+
+  matrix_init (&R, A->n + B->n + 1);
+  matrix_init (&P, A->n + B->n + 1);
+
+  tp = refmpn_malloc_limbs (mpn_matrix22_mul_itch (A->n, B->n));
+
+  ref_matrix22_mul (&R, A, B, tp);
+  matrix_copy (&P, A);
+  mpn_matrix22_mul (P.e00, P.e01, P.e10, P.e11, A->n,
+		    B->e00, B->e01, B->e10, B->e11, B->n, tp);
+  P.n = A->n + B->n + 1;
+  if (!matrix_equal_p (&R, &P))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      gmp_fprintf (stderr, "A = (%Nx, %Nx\n      %Nx, %Nx)\n"
+		   "B = (%Nx, %Nx\n      %Nx, %Nx)\n"
+		   "R = (%Nx, %Nx (expected)\n      %Nx, %Nx)\n"
+		   "P = (%Nx, %Nx (incorrect)\n      %Nx, %Nx)\n",
+		   A->e00, A->n, A->e01, A->n, A->e10, A->n, A->e11, A->n,
+		   B->e00, B->n, B->e01, B->n, B->e10, B->n, B->e11, B->n,
+		   R.e00, R.n, R.e01, R.n, R.e10, R.n, R.e11, R.n,
+		   P.e00, P.n, P.e01, P.n, P.e10, P.n, P.e11, P.n);
+      abort();
+    }
+  refmpn_free_limbs (tp);
+  matrix_clear (&R);
+  matrix_clear (&P);
+}
+
+#define MAX_SIZE (2+2*MATRIX22_STRASSEN_THRESHOLD)
+
+int
+main (int argc, char **argv)
+{
+  struct matrix A;
+  struct matrix B;
+
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  int i;
+
+  tests_start ();
+  rands = RANDS;
+
+  matrix_init (&A, MAX_SIZE);
+  matrix_init (&B, MAX_SIZE);
+  mpz_init (bs);
+
+  for (i = 0; i < 1000; i++)
+    {
+      mp_size_t an, bn;
+      mpz_urandomb (bs, rands, 32);
+      an = 1 + mpz_get_ui (bs) % MAX_SIZE;
+      mpz_urandomb (bs, rands, 32);
+      bn = 1 + mpz_get_ui (bs) % MAX_SIZE;
+
+      matrix_random (&A, an, rands);
+      matrix_random (&B, bn, rands);
+
+      one_test (&A, &B, i);
+    }
+  mpz_clear (bs);
+  matrix_clear (&A);
+  matrix_clear (&B);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-minvert.c b/tests/mpn/t-minvert.c
new file mode 100644
index 0000000..ca5690f
--- /dev/null
+++ b/tests/mpn/t-minvert.c

@@ -0,0 +1,167 @@
+/* Copyright 2013-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>		/* for strtol */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_SIZE 50
+
+#define COUNT 200
+
+static void
+mpz_to_mpn (mp_ptr ap, mp_size_t an, const mpz_t b)
+{
+  mp_size_t bn = mpz_size (b);
+  ASSERT_ALWAYS (bn <= an);
+  MPN_COPY_INCR (ap, mpz_limbs_read (b), bn);
+  MPN_ZERO (ap + bn, an - bn);
+}
+
+int
+mpz_eq_mpn (mp_ptr ap, mp_size_t an, const mpz_t b)
+{
+  mp_size_t bn = mpz_size (b);
+
+  return (bn >= 0 && bn <= an
+	  && mpn_cmp (ap, mpz_limbs_read (b), bn) == 0
+	  && (an == bn || mpn_zero_p (ap + bn, an - bn)));
+}
+
+static mp_bitcnt_t
+bit_size (mp_srcptr xp, mp_size_t n)
+{
+  MPN_NORMALIZE (xp, n);
+  return n > 0 ? mpn_sizeinbase (xp, n, 2) : 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  long count = COUNT;
+  mp_ptr mp;
+  mp_ptr ap;
+  mp_ptr tp;
+  mp_ptr scratch;
+  mpz_t m, a, r, g;
+  int test;
+  mp_limb_t ran;
+  mp_size_t itch;
+  TMP_DECL;
+
+  tests_start ();
+  rands = RANDS;
+
+
+  TMP_MARK;
+  mpz_init (m);
+  mpz_init (a);
+  mpz_init (r);
+  mpz_init (g);
+
+  TESTS_REPS (count, argv, argc);
+
+  mp = TMP_ALLOC_LIMBS (MAX_SIZE);
+  ap = TMP_ALLOC_LIMBS (MAX_SIZE);
+  tp = TMP_ALLOC_LIMBS (MAX_SIZE);
+  scratch = TMP_ALLOC_LIMBS (mpn_sec_invert_itch (MAX_SIZE) + 1);
+
+  for (test = 0; test < count; test++)
+    {
+      mp_bitcnt_t bits;
+      int rres, tres;
+      mp_size_t n;
+
+      bits = urandom () % (GMP_NUMB_BITS * MAX_SIZE) + 1;
+
+      if (test & 1)
+	mpz_rrandomb (m, rands, bits);
+      else
+	mpz_urandomb (m, rands, bits);
+      if (test & 2)
+	mpz_rrandomb (a, rands, bits);
+      else
+	mpz_urandomb (a, rands, bits);
+
+      mpz_setbit (m, 0);
+      if (test & 4)
+	{
+	  /* Ensure it really is invertible */
+	  if (mpz_sgn (a) == 0)
+	    mpz_set_ui (a, 1);
+	  else
+	    for (;;)
+	      {
+		mpz_gcd (g, a, m);
+		if (mpz_cmp_ui (g, 1) == 0)
+		  break;
+		mpz_remove (a, a, g);
+	      }
+	}
+
+      rres = mpz_invert (r, a, m);
+      if ( (test & 4) && !rres)
+	{
+	  gmp_fprintf (stderr, "test %d: Not invertible!\n"
+		       "m = %Zd\n"
+		       "a = %Zd\n", test, m, a);
+	  abort ();
+	}
+      ASSERT_ALWAYS (! (test & 4) || rres);
+
+      n = (bits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
+      ASSERT_ALWAYS (n <= MAX_SIZE);
+      itch = mpn_sec_invert_itch (n);
+      scratch[itch] = ran = urandom ();
+
+      mpz_to_mpn (ap, n, a);
+      mpz_to_mpn (mp, n, m);
+      tres = mpn_sec_invert (tp, ap, mp, n,
+			     bit_size (ap, n) + bit_size (mp, n),
+			     scratch);
+
+      if (rres != tres || (rres == 1 && !mpz_eq_mpn (tp, n, r)) || ran != scratch[itch])
+	{
+	  gmp_fprintf (stderr, "Test %d failed.\n"
+		       "m = %Zd\n"
+		       "a = %Zd\n", test, m, a);
+	  fprintf (stderr, "ref ret: %d\n"
+		  "got ret: %d\n", rres, tres);
+	  if (rres)
+	    gmp_fprintf (stderr, "ref: %Zd\n", r);
+	  if (tres)
+	    gmp_fprintf (stderr, "got: %Nd\n", tp, n);
+	  if (ran != scratch[itch])
+	    fprintf (stderr, "scratch[itch] changed.\n");
+	  abort ();
+	}
+    }
+
+  TMP_FREE;
+
+  mpz_clear (m);
+  mpz_clear (a);
+  mpz_clear (r);
+  mpz_clear (g);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mod_1.c b/tests/mpn/t-mod_1.c
new file mode 100644
index 0000000..5b9570d
--- /dev/null
+++ b/tests/mpn/t-mod_1.c

@@ -0,0 +1,127 @@
+/* Test mpn_mod_1 variants.
+
+Copyright 2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+static void
+check_one (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_limb_t r_ref = refmpn_mod_1 (ap, n, b);
+  mp_limb_t r;
+
+  if (n >= 2)
+    {
+      mp_limb_t pre[4];
+      mpn_mod_1_1p_cps (pre, b);
+      r = mpn_mod_1_1p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+	{
+	  printf ("mpn_mod_1_1p failed\n");
+	  goto fail;
+	}
+    }
+  if ((b & GMP_NUMB_HIGHBIT) == 0)
+    {
+      mp_limb_t pre[5];
+      mpn_mod_1s_2p_cps (pre, b);
+      r = mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+	{
+	  printf ("mpn_mod_1s_2p failed\n");
+	  goto fail;
+	}
+    }
+  if (b <= GMP_NUMB_MASK / 3)
+    {
+      mp_limb_t pre[6];
+      mpn_mod_1s_3p_cps (pre, b);
+      r = mpn_mod_1s_3p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+	{
+	  printf ("mpn_mod_1s_3p failed\n");
+	  goto fail;
+	}
+    }
+  if (b <= GMP_NUMB_MASK / 4)
+    {
+      mp_limb_t pre[7];
+      mpn_mod_1s_4p_cps (pre, b);
+      r = mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+	{
+	  printf ("mpn_mod_1s_4p failed\n");
+	  goto fail;
+	}
+    }
+  r = mpn_mod_1 (ap, n, b);
+  if (r != r_ref)
+    {
+      printf ("mpn_mod_1 failed\n");
+    fail:
+      printf ("an = %d, a: ", (int) n); mpn_dump (ap, n);
+      printf ("b           : "); mpn_dump (&b, 1);
+      printf ("r (expected): "); mpn_dump (&r_ref, 1);
+      printf ("r (bad)     : "); mpn_dump (&r, 1);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  int i;
+  unsigned a_bits;
+  unsigned b_bits;
+  mpz_t a;
+  mpz_t b;
+
+  tests_start ();
+  rands = RANDS;
+  mpz_init (a);
+  mpz_init (b);
+
+  for (i = 0; i < 300; i++)
+    {
+      mp_size_t asize;
+      a_bits = 1 + gmp_urandomm_ui (rands, 1000);
+      b_bits = 1 + gmp_urandomm_ui (rands, GMP_NUMB_BITS);
+
+      mpz_rrandomb (a, rands, a_bits);
+      mpz_rrandomb (b, rands, b_bits);
+
+      asize = SIZ(a);
+      if (!asize)
+	asize = 1;
+      if (mpz_sgn (b) == 0)
+	mpz_set_ui (b, 1);
+
+      check_one (PTR(a), asize, PTR(b)[0]);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mp_bases.c b/tests/mpn/t-mp_bases.c
new file mode 100644
index 0000000..deb7f5c
--- /dev/null
+++ b/tests/mpn/t-mp_bases.c

@@ -0,0 +1,104 @@
+/* Check mp_bases values.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+int
+main (int argc, char *argv[])
+{
+  mp_limb_t  want_bb, want_bb_inv;
+  int        base, want_chars_per_limb;
+
+  want_chars_per_limb = refmpn_chars_per_limb (10);
+  if (MP_BASES_CHARS_PER_LIMB_10 != want_chars_per_limb)
+    {
+      printf ("MP_BASES_CHARS_PER_LIMB_10 wrong\n");
+      abort ();
+    }
+
+  want_bb = refmpn_big_base (10);
+  if (MP_BASES_BIG_BASE_10 != want_bb)
+    {
+      printf ("MP_BASES_BIG_BASE_10 wrong\n");
+      abort ();
+    }
+
+  want_bb_inv = refmpn_invert_limb
+    (want_bb << refmpn_count_leading_zeros (want_bb));
+  if (MP_BASES_BIG_BASE_INVERTED_10 != want_bb_inv)
+    {
+      printf ("MP_BASES_BIG_BASE_INVERTED_10 wrong\n");
+      abort ();
+    }
+
+  if (MP_BASES_NORMALIZATION_STEPS_10
+      != refmpn_count_leading_zeros (MP_BASES_BIG_BASE_10))
+    {
+      printf ("MP_BASES_NORMALIZATION_STEPS_10 wrong\n");
+      abort ();
+    }
+
+  for (base = 2; base < numberof (mp_bases); base++)
+    {
+      want_chars_per_limb = refmpn_chars_per_limb (base);
+      if (mp_bases[base].chars_per_limb != want_chars_per_limb)
+        {
+          printf ("mp_bases[%d].chars_per_limb wrong\n", base);
+          printf ("  got  %d\n", mp_bases[base].chars_per_limb);
+          printf ("  want %d\n", want_chars_per_limb);
+          abort ();
+        }
+
+      if (POW2_P (base))
+        {
+          want_bb = refmpn_count_trailing_zeros ((mp_limb_t) base);
+          if (mp_bases[base].big_base != want_bb)
+            {
+              printf ("mp_bases[%d].big_base (log2 of base) wrong\n", base);
+              abort ();
+            }
+        }
+      else
+        {
+          want_bb = refmpn_big_base (base);
+          if (mp_bases[base].big_base != want_bb)
+            {
+              printf ("mp_bases[%d].big_base wrong\n", base);
+              abort ();
+            }
+
+#if USE_PREINV_DIVREM_1
+          want_bb_inv = refmpn_invert_limb
+            (want_bb << refmpn_count_leading_zeros (want_bb));
+          if (mp_bases[base].big_base_inverted != want_bb_inv)
+            {
+              printf ("mp_bases[%d].big_base_inverted wrong\n", base);
+              abort ();
+            }
+#endif
+        }
+    }
+
+  exit (0);
+}

diff --git a/tests/mpn/t-mul.c b/tests/mpn/t-mul.c
new file mode 100644
index 0000000..40f6de7
--- /dev/null
+++ b/tests/mpn/t-mul.c

@@ -0,0 +1,97 @@
+/* Test mpn_mul function for all sizes up to a selected limit.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+static unsigned
+isqrt (unsigned t)
+{
+  unsigned s, b;
+
+  for (b = 0, s = t;  b++, s >>= 1; )
+    ;
+
+  s = 1 << (b >> 1);
+  if (b & 1)
+    s += s >> 1;
+
+  do
+    {
+      b = t / s;
+      s = (s + b) >> 1;
+    }
+  while (b < s);
+
+  return s;
+}
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, rp, refp;
+  mp_size_t max_n, an, bn, rn;
+  int reps;
+  TMP_DECL;
+  TMP_MARK;
+
+  reps = 1;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  /* Re-interpret reps argument as a size argument.  */
+  max_n = isqrt (reps * 25000);
+
+  ap = TMP_ALLOC_LIMBS (max_n + 1);
+  bp = TMP_ALLOC_LIMBS (max_n + 1);
+  rp = TMP_ALLOC_LIMBS (2 * max_n);
+  refp = TMP_ALLOC_LIMBS (2 * max_n);
+
+  for (an = 1; an <= max_n; an += 1)
+    {
+      for (bn = 1; bn <= an; bn += 1)
+	{
+	  mpn_random2 (ap, an + 1);
+	  mpn_random2 (bp, bn + 1);
+
+	  refmpn_mul (refp, ap, an, bp, bn);
+	  mpn_mul (rp, ap, an, bp, bn);
+
+	  rn = an + bn;
+	  if (mpn_cmp (refp, rp, rn))
+	    {
+	      printf ("ERROR, an = %d, bn = %d, rn = %d\n",
+		      (int) an, (int) bn, (int) rn);
+	      printf ("a: "); mpn_dump (ap, an);
+	      printf ("b: "); mpn_dump (bp, bn);
+	      printf ("r:   "); mpn_dump (rp, rn);
+	      printf ("ref: "); mpn_dump (refp, rn);
+	      abort();
+	    }
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mullo.c b/tests/mpn/t-mullo.c
new file mode 100644
index 0000000..75a0f01
--- /dev/null
+++ b/tests/mpn/t-mullo.c

@@ -0,0 +1,132 @@
+/* Test for mullo function.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 10
+#endif
+
+#ifndef COUNT
+#define COUNT 10000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N (1)
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+#define mpn_mullo_itch(n) (0)
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 2);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_mullo_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+
+      mpn_random2 (ap, n);
+      mpn_random2 (bp, n);
+      mpn_random2 (pp-1, n + 2);
+      p_before = pp[-1];
+      p_after = pp[n];
+
+      itch = mpn_mullo_itch (n);
+      ASSERT_ALWAYS (itch <= mpn_mullo_itch (MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mullo_n (pp, ap, bp, n);
+      mpn_mul_n (refp, ap, bp, n);
+      if (pp[-1] != p_before || pp[n] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, n) != 0)
+	{
+	  printf ("ERROR in test %d, n = %d",
+		  test, (int) n);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[n] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + n, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, n);
+	  mpn_dump (bp, n);
+	  mpn_dump (pp, n);
+	  mpn_dump (refp, n);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mulmid.c b/tests/mpn/t-mulmid.c
new file mode 100644
index 0000000..9491b0e
--- /dev/null
+++ b/tests/mpn/t-mulmid.c

@@ -0,0 +1,92 @@
+/* Test for mulmid function.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 9
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, rp, refp;
+  gmp_randstate_ptr rands;
+  int test;
+  TMP_DECL;
+  TMP_MARK;
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  rp = TMP_ALLOC_LIMBS (MAX_N + 2);
+  refp = TMP_ALLOC_LIMBS (MAX_N + 2);
+
+  for (test = 0; test < COUNT; test++)
+    {
+      mp_size_t an, bn, rn;
+      unsigned size_log;
+
+      size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+      an = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+      size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+      bn = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+      /* Make sure an >= bn */
+      if (an < bn)
+	MP_SIZE_T_SWAP (an, bn);
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+
+      refmpn_mulmid (refp, ap, an, bp, bn);
+      mpn_mulmid (rp, ap, an, bp, bn);
+
+      rn = an + 3 - bn;
+      if (mpn_cmp (refp, rp, rn))
+	{
+	  printf ("ERROR in test %d, an = %d, bn = %d, rn = %d\n",
+		  test, (int) an, (int) bn, (int) rn);
+	  printf("a: "); mpn_dump (ap, an);
+	  printf("b: "); mpn_dump (bp, bn);
+	  printf("r:   "); mpn_dump (rp, rn);
+	  printf("ref: "); mpn_dump (refp, rn);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mulmod_bknp1.c b/tests/mpn/t-mulmod_bknp1.c
new file mode 100644
index 0000000..93da1eb
--- /dev/null
+++ b/tests/mpn/t-mulmod_bknp1.c

@@ -0,0 +1,202 @@
+/* Test for mulmod_bknp1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009, 2020-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if MOD_BKNP1_USE11
+#define USE11 11,
+#else
+#define USE11
+#endif
+
+
+#if GMP_NUMB_BITS % 32 == 0
+#define MAX_K 17
+#define SUPPORTED_K {3, 5, 7, 13, USE11 MAX_K}
+#else
+#if GMP_NUMB_BITS % 16 == 0
+#define MAX_K 13
+#define SUPPORTED_K {3, 5, 7, USE11 MAX_K}
+#else
+#if GMP_NUMB_BITS % 8 == 0
+#define MAX_K 7
+#define SUPPORTED_K {3, USE11 MAX_K}
+#else
+#define SUPPORTED_K {USE11} /* Supported ? */
+#endif /* GMP_NUMB_BITS % 8 == 0 */
+#endif /* GMP_NUMB_BITS % 16 == 0 */
+#endif /* GMP_NUMB_BITS % 32 == 0 */
+
+#if MOD_BKNP1_ONLY3
+#undef SUPPORTED_K
+#undef MAX_K
+#define MAX_K 3
+#define SUPPORTED_K {3}
+#endif
+
+/* Sizes are up to MAX_K * 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 7
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (MAX_K << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for multiplication modulo B^{k*rn}+1.
+*/
+
+static void
+ref_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn)
+{
+  mp_limb_t cy;
+
+  mpn_mul_n (rp, ap, bp, rn + 1);
+  cy = rp[2 * rn];
+  MPN_INCR_U (rp, 2 * rn + 1, rp[2 * rn]);
+  cy = rp[2 * rn] - cy + mpn_sub_n (rp, rp, rp + rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/*
+  Compare the result of the mpn_mulmod_bnp1 function in the library
+  with the reference function above.
+*/
+unsigned supported_k[] = SUPPORTED_K;
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_N + 1);
+  bp = TMP_ALLOC_LIMBS (MAX_N + 1);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 2 + 2);
+  pp = 1 + TMP_ALLOC_LIMBS (MAX_N + 3);
+  scratch
+    = 1 + TMP_ALLOC_LIMBS (mpn_mulmod_bknp1_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      unsigned k;
+      mp_size_t rn, n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate rn in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      k = supported_k[test % numberof (supported_k)];
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+      rn = k * n;
+      if ((GMP_NUMB_MAX % k != 0) && (rn % 3 == 0))
+	n = rn / (k = 3);
+
+      if (test == 0)
+	{
+	  mpn_random2 (ap, n);
+	  mpn_add_1 (ap + n, ap, n, 1); /* {ap,an} = -1 mod B+1 */
+	  MPN_ZERO (ap + 2 * n, rn - 2 * n + 1);
+	}
+      else
+	mpn_random2 (ap, rn + 1);
+      mpn_random2 (bp, rn + 1);
+
+      bp [rn] &= 1;
+      ap [rn] &= 1;
+
+      mpn_random2 (pp-1, rn + 3);
+      p_before = pp[-1];
+      p_after = pp[rn + 1];
+
+      itch = mpn_mulmod_bknp1_itch (rn);
+      ASSERT_ALWAYS (itch <= mpn_mulmod_bknp1_itch (MAX_N));
+      mpn_random2 (scratch - 1, itch + 2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mulmod_bknp1 ( pp, ap, bp, n, k, scratch);
+      ref_mulmod_bnp1 (refp, ap, bp, rn);
+      if (pp[-1] != p_before || pp[rn + 1] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, rn + 1) != 0)
+	{
+	  printf ("ERROR in test %d, rn = %d, n = %d, k = %d\n",
+		  test, (int) rn, (int) n, (int) k);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp - 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[rn + 1] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + rn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch - 1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, rn + 1);
+	  mpn_dump (bp, rn + 1);
+	  mpn_dump (pp, rn + 1);
+	  mpn_dump (refp, rn + 1);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-mulmod_bnm1.c b/tests/mpn/t-mulmod_bnm1.c
new file mode 100644
index 0000000..08f59bb
--- /dev/null
+++ b/tests/mpn/t-mulmod_bnm1.c

@@ -0,0 +1,210 @@
+
+/* Test for mulmod_bnm1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 11
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for multiplication modulo B^rn-1.
+
+  The result is expected to be ZERO if and only if one of the operand
+  already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+  B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+  combine results and obtain a natural number when one knows in
+  advance that the final value is less than (B^rn-1).
+*/
+
+static void
+ref_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < an && an <= rn);
+  ASSERT (0 < bn && bn <= rn);
+
+  if (an >= bn)
+    refmpn_mul (rp, ap, an, bp, bn);
+  else
+    refmpn_mul (rp, bp, bn, ap, an);
+  an += bn;
+  if (an > rn) {
+    cy = mpn_add (rp, rp, rn, rp + rn, an - rn);
+    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+     * be no overflow when adding in the carry. */
+    MPN_INCR_U (rp, rn, cy);
+  }
+}
+
+/*
+  Compare the result of the mpn_mulmod_bnm1 function in the library
+  with the reference function above.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ASSERT_ALWAYS (mpn_mulmod_bnm1_next_size (MAX_N) == MAX_N);
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 4);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (MAX_N, MAX_N, MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an,bn,rn,n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+      n = mpn_mulmod_bnm1_next_size (n);
+
+      if ( (test & 1) || n == 1) {
+	/* Half of the tests are done with the main scenario in mind:
+	   both an and bn >= rn/2 */
+	an = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+	bn = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+      } else {
+	/* Second half of the tests are done using mulmod to compute a
+	   full product with n/2 < an+bn <= n. */
+	an = 1 + gmp_urandomm_ui (rands, n - 1);
+	if (an >= n/2)
+	  bn = 1 + gmp_urandomm_ui (rands, n - an);
+	else
+	  bn = n/2 + 1 - an + gmp_urandomm_ui (rands, (n+1)/2);
+      }
+
+      /* Make sure an >= bn */
+      if (an < bn)
+	MP_SIZE_T_SWAP (an, bn);
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+
+      /* Sometime trigger the borderline conditions
+	 A = -1,0,+1 or B = -1,0,+1 or A*B == -1,0,1 Mod(B^{n/2}+1).
+	 This only makes sense if there is at least a split, i.e. n is even. */
+      if ((test & 0x1f) == 1 && (n & 1) == 0) {
+	mp_size_t x;
+	MPN_COPY (ap, ap + (n >> 1), an - (n >> 1));
+	MPN_ZERO (ap + an - (n >> 1) , n - an);
+	MPN_COPY (bp, bp + (n >> 1), bn - (n >> 1));
+	MPN_ZERO (bp + bn - (n >> 1) , n - bn);
+	x = 0;
+	/* x = (n == an) ? 0 : gmp_urandomm_ui (rands, n - an); */
+	ap[x] += gmp_urandomm_ui (rands, 3) - 1;
+	/* x = (n >> 1) - x % (n >> 1); */
+	bp[x] += gmp_urandomm_ui (rands, 3) - 1;
+	/* We don't propagate carry, this means that the desired condition
+	   is not triggered all the times. A few times are enough anyway. */
+      }
+      rn = MIN(n, an + bn);
+      mpn_random2 (pp-1, rn + 2);
+      p_before = pp[-1];
+      p_after = pp[rn];
+
+      itch = mpn_mulmod_bnm1_itch (n, an, bn);
+      ASSERT_ALWAYS (itch <= mpn_mulmod_bnm1_itch (MAX_N, MAX_N, MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mulmod_bnm1 (  pp, n, ap, an, bp, bn, scratch);
+      ref_mulmod_bnm1 (refp, n, ap, an, bp, bn);
+      if (pp[-1] != p_before || pp[rn] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, rn) != 0)
+	{
+	  printf ("ERROR in test %d, an = %d, bn = %d, n = %d\n",
+		  test, (int) an, (int) bn, (int) n);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[rn] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + rn, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, an);
+	  mpn_dump (bp, bn);
+	  mpn_dump (pp, rn);
+	  mpn_dump (refp, rn);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-perfsqr.c b/tests/mpn/t-perfsqr.c
new file mode 100644
index 0000000..b65ee8b
--- /dev/null
+++ b/tests/mpn/t-perfsqr.c

@@ -0,0 +1,116 @@
+/* Test mpn_perfect_square_p data.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include "mpn/perfsqr.h"
+
+
+#define PERFSQR_MOD_MASK   ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+void
+check_mod_2 (mp_limb_t d, mp_limb_t inv, mp_limb_t got_hi, mp_limb_t got_lo)
+{
+  int        want[2*GMP_LIMB_BITS], got;
+  unsigned   r, idx;
+  mp_limb_t  q;
+
+  ASSERT_ALWAYS (d <= numberof (want));
+  ASSERT_ALWAYS (((inv * d) & PERFSQR_MOD_MASK) == 1);
+  ASSERT_ALWAYS (MP_LIMB_T_MAX / d >= PERFSQR_MOD_MASK);
+
+  /* the squares mod d */
+  for (r = 0; r < d; r++)
+    want[r] = 0;
+  for (r = 0; r < d; r++)
+    want[(r*r)%d] = 1;
+
+  /* for each remainder mod d, expect the table data to correctly identify
+     it as a residue or non-residue */
+  for (r = 0; r < d; r++)
+    {
+      /* as per PERFSQR_MOD_IDX */
+      q = ((r) * (inv)) & PERFSQR_MOD_MASK;
+      idx = (q * (d)) >> PERFSQR_MOD_BITS;
+
+      if (idx >= GMP_LIMB_BITS)
+        got = (got_hi >> (idx - GMP_LIMB_BITS)) & 1;
+      else
+        got = (got_lo >> idx) & 1;
+
+      if (got != want[r])
+        {
+          printf ("Wrong generated data\n");
+          printf ("  d=%u\n", (unsigned) d);
+          printf ("  r=%u\n", r);
+          printf ("  idx=%u\n", idx);
+          printf ("  got  %d\n", got);
+          printf ("  want %d\n", want[r]);
+          abort ();
+        }
+    }
+}
+
+/* Check the generated data in perfsqr.h. */
+void
+check_mod (void)
+{
+#define PERFSQR_MOD_34(r, up, usize)       { r = 0; } /* so r isn't unused */
+#define PERFSQR_MOD_PP(r, up, usize)       { r = 0; }
+#define PERFSQR_MOD_1(r, d, inv, mask)     check_mod_2 (d, inv, CNST_LIMB(0), mask)
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) check_mod_2 (d, inv, mhi, mlo)
+
+  PERFSQR_MOD_TEST (dummy, dummy);
+}
+
+/* Check PERFSQR_PP, if in use. */
+void
+check_pp (void)
+{
+#ifdef PERFSQR_PP
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP);
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP_NORM);
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP_INVERTED);
+
+  /* preinv stuff only for nails==0 */
+  if (GMP_NAIL_BITS == 0)
+    {
+      ASSERT_ALWAYS (PERFSQR_PP_NORM
+                     == PERFSQR_PP << refmpn_count_leading_zeros (PERFSQR_PP));
+      ASSERT_ALWAYS (PERFSQR_PP_INVERTED
+                     == refmpn_invert_limb (PERFSQR_PP_NORM));
+    }
+#endif
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_mod ();
+  check_pp ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-scan.c b/tests/mpn/t-scan.c
new file mode 100644
index 0000000..ec25b95
--- /dev/null
+++ b/tests/mpn/t-scan.c

@@ -0,0 +1,144 @@
+/* Test mpn_scan0 and mpn_scan1.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+#define SIZE  ((mp_size_t) 3)
+mp_limb_t  x[SIZE+1];
+
+void
+check (void)
+{
+  unsigned long  i, got, want;
+
+  x[SIZE] = 1;
+  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)
+    {
+      got = refmpn_scan1 (x, i);
+      want = mpn_scan1 (x, i);
+      if (got != want)
+        {
+          printf ("mpn_scan1\n");
+          printf ("  i     %lu\n", i);
+          printf ("  got   %lu\n", got);
+          printf ("  want  %lu\n", want);
+          mpn_trace ("  x    ", x, SIZE);
+          abort ();
+        }
+    }
+
+  x[SIZE] = 0;
+  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)
+    {
+      got = refmpn_scan0 (x, i);
+      want = mpn_scan0 (x, i);
+      if (got != want)
+        {
+          printf ("mpn_scan0\n");
+          printf ("  i     %lu\n", i);
+          printf ("  got   %lu\n", got);
+          printf ("  want  %lu\n", want);
+          mpn_trace ("  x    ", x, SIZE);
+          abort ();
+        }
+    }
+}
+
+void
+check_twobits (void)
+{
+#define TWOBITS(a, b) \
+  ((CNST_LIMB(1) << (a)) | (CNST_LIMB(1) << (b)))
+
+  refmpn_zero (x, SIZE);
+  x[0] = TWOBITS (1, 0);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = TWOBITS (GMP_NUMB_BITS-1, 1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = CNST_LIMB(1);
+  x[1] = CNST_LIMB(1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = CNST_LIMB(1) << (GMP_NUMB_BITS-1);
+  x[1] = CNST_LIMB(1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[1] = TWOBITS (1, 0);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[1] = CNST_LIMB(1);
+  x[2] = CNST_LIMB(1);
+  check ();
+}
+
+/* This is unused, it takes too long, especially on 64-bit systems. */
+void
+check_twobits_exhaustive (void)
+{
+  unsigned long  i, j;
+
+  for (i = 0; i < GMP_NUMB_BITS * SIZE; i++)
+    {
+      for (j = 0; j < GMP_NUMB_BITS * SIZE; j++)
+        {
+          refmpn_zero (x, SIZE);
+          refmpn_setbit (x, i);
+          refmpn_setbit (x, j);
+          check ();
+        }
+    }
+}
+
+void
+check_rand (void)
+{
+  int  i;
+
+  for (i = 0; i < 100; i++)
+    {
+      refmpn_random2 (x, SIZE);
+      check ();
+    }
+}
+
+int
+main (void)
+{
+  mp_trace_base = -16;
+  tests_start ();
+
+  check_twobits ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpn/t-sizeinbase.c b/tests/mpn/t-sizeinbase.c
new file mode 100644
index 0000000..f34714a
--- /dev/null
+++ b/tests/mpn/t-sizeinbase.c

@@ -0,0 +1,98 @@
+/* Test for sizeinbase function.
+
+Copyright 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Exponents up to 2^SIZE_LOG */
+#ifndef SIZE_LOG
+#define SIZE_LOG 13
+#endif
+
+#ifndef COUNT
+#define COUNT 30
+#endif
+
+#define MAX_N (1<<SIZE_LOG)
+
+int
+main (int argc, char **argv)
+{
+  mp_limb_t a;
+  mp_ptr pp, scratch;
+  mp_limb_t max_b;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  TMP_MARK;
+  rands = RANDS;
+
+  pp = TMP_ALLOC_LIMBS (MAX_N);
+  scratch = TMP_ALLOC_LIMBS (MAX_N);
+  max_b = numberof (mp_bases);
+
+  ASSERT_ALWAYS (max_b > 62);
+  ASSERT_ALWAYS (max_b < GMP_NUMB_MAX);
+
+  for (a = 2; a < max_b; ++a)
+    for (test = 0; test < count; ++test)
+      {
+	mp_size_t pn;
+	mp_limb_t exp;
+	mp_bitcnt_t res;
+
+	exp = gmp_urandomm_ui (rands, MAX_N);
+
+	pn = mpn_pow_1 (pp, &a, 1, exp, scratch);
+
+	res = mpn_sizeinbase (pp, pn, a) - 1;
+
+	if ((res < exp) || (res > exp + 1))
+	  {
+	    printf ("ERROR in test %d, base = %d, exp = %d, res = %d\n",
+		    test, (int) a, (int) exp, (int) res);
+	    abort();
+	  }
+
+	mpn_sub_1 (pp, pp, pn, CNST_LIMB(1));
+	pn -= pp[pn-1] == 0;
+
+	res = mpn_sizeinbase (pp, pn, a);
+
+	if ((res < exp) || (res - 1 > exp))
+	  {
+	    printf ("ERROR in -1 test %d, base = %d, exp = %d, res = %d\n",
+		    test, (int) a, (int) exp, (int) res);
+	    abort();
+	  }
+      }
+
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-sqrlo.c b/tests/mpn/t-sqrlo.c
new file mode 100644
index 0000000..e9ab1e0
--- /dev/null
+++ b/tests/mpn/t-sqrlo.c

@@ -0,0 +1,129 @@
+/* Test for sqrlo function.
+
+Copyright 2009, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 10
+#endif
+
+#ifndef COUNT
+#define COUNT 10000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N (1)
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+#define mpn_sqrlo_itch(n) (0)
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 2);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_sqrlo_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+
+      mpn_random2 (ap, n);
+      mpn_random2 (pp-1, n + 2);
+      p_before = pp[-1];
+      p_after = pp[n];
+
+      itch = mpn_sqrlo_itch (n);
+      ASSERT_ALWAYS (itch <= mpn_sqrlo_itch (MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_sqrlo (pp, ap, n);
+      mpn_sqr (refp, ap, n);
+      if (pp[-1] != p_before || pp[n] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, n) != 0)
+	{
+	  printf ("ERROR in test %d, n = %d",
+		  test, (int) n);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[n] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + n, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, n);
+	  mpn_dump (pp, n);
+	  mpn_dump (refp, n);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-sqrmod_bknp1.c b/tests/mpn/t-sqrmod_bknp1.c
new file mode 100644
index 0000000..1620925
--- /dev/null
+++ b/tests/mpn/t-sqrmod_bknp1.c

@@ -0,0 +1,251 @@
+/* Test for mulmod_bknp1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009, 2020-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#if MOD_BKNP1_USE11
+#define USE11 11,
+#else
+#define USE11
+#endif
+
+#if GMP_NUMB_BITS % 32 == 0
+#define MAX_K 17
+#define SUPPORTED_K {3, 5, 7, 13, USE11 MAX_K}
+#else
+#if GMP_NUMB_BITS % 16 == 0
+#define MAX_K 13
+#define SUPPORTED_K {3, 5, 7, USE11 MAX_K}
+#else
+#if GMP_NUMB_BITS % 8 == 0
+#define MAX_K 7
+#define SUPPORTED_K {3, USE11 MAX_K}
+#else
+#define SUPPORTED_K {USE11} /* Supported ? */
+#endif /* GMP_NUMB_BITS % 8 == 0 */
+#endif /* GMP_NUMB_BITS % 16 == 0 */
+#endif /* GMP_NUMB_BITS % 32 == 0 */
+
+#if MOD_BKNP1_ONLY3
+#undef SUPPORTED_K
+#undef MAX_K
+#define MAX_K 3
+#define SUPPORTED_K {3}
+#endif
+
+/* Sizes are up to MAX_K * 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 7
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (MAX_K << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for multiplication modulo B^{k*rn}+1.
+*/
+
+static void
+ref_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn)
+{
+  mp_limb_t cy;
+
+  mpn_sqr (rp, ap, rn + 1);
+  cy = rp[2 * rn];
+  MPN_INCR_U (rp, 2 * rn + 1, rp[2 * rn]);
+  cy = rp[2 * rn] - cy + mpn_sub_n (rp, rp, rp + rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn + 1, cy);
+}
+
+/*
+  Compare the result of the mpn_mulmod_bnp1 function in the library
+  with the reference function above.
+*/
+unsigned supported_k[] = SUPPORTED_K;
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_N + 1);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 2 + 2);
+  pp = 1 + TMP_ALLOC_LIMBS (MAX_N + 3);
+  scratch
+    = 1 + TMP_ALLOC_LIMBS (mpn_mulmod_bknp1_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      unsigned k;
+      mp_size_t rn, n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate rn in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      k = supported_k[test % numberof (supported_k)];
+      if (test < numberof (supported_k))
+	{
+	  n = 1;
+	  rn = k;
+	  ap [rn] = 0;
+	  mp_limb_t x = GMP_NUMB_MAX / k + 1;
+	  ap [0] = x;
+	  for (int i = 1; i < k; i += 2)
+	    {
+	      ap [i] = - x;
+	      ap [i + 1] = x - 1;
+	    }
+	}
+      else
+	{
+	  n = MIN_N
+	    + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+	  rn = k * n;
+	  if ((GMP_NUMB_MAX % k != 0) && (rn % 3 == 0))
+	    n = rn / (k = 3);
+
+	  mpn_random2 (ap, rn + 1);
+
+	  ap [rn] &= 1;
+	}
+
+      mpn_random2 (pp-1, rn + 3);
+      p_before = pp[-1];
+      p_after = pp[rn + 1];
+
+      itch = mpn_sqrmod_bknp1_itch (rn);
+      ASSERT_ALWAYS (itch <= mpn_mulmod_bknp1_itch (MAX_N));
+      mpn_random2 (scratch - 1, itch + 2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_sqrmod_bknp1 (  pp, ap, n, k, scratch);
+      ref_sqrmod_bnp1 (refp, ap, rn);
+      if (pp[-1] != p_before || pp[rn + 1] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, rn + 1) != 0)
+	{
+	  printf ("ERROR in test %d(sqr), rn = %d, n = %d, k = %d\n",
+		  test, (int) rn, (int) n, (int) k);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp - 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[rn + 1] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + rn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch - 1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, rn + 1);
+	  mpn_dump (pp, rn + 1);
+	  mpn_dump (refp, rn + 1);
+
+	  abort();
+	}
+
+      mpn_random2 (pp-1, rn + 3);
+      p_before = pp[-1];
+      p_after = pp[rn + 1];
+
+      itch = mpn_mulmod_bknp1_itch (rn);
+      ASSERT_ALWAYS (itch <= mpn_mulmod_bknp1_itch (MAX_N));
+      mpn_random2 (scratch - 1, itch + 2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mulmod_bknp1 (  pp, ap, ap, n, k, scratch);
+      if (pp[-1] != p_before || pp[rn + 1] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, rn + 1) != 0)
+	{
+	  printf ("ERROR in test %d(mul), rn = %d, n = %d, k = %d\n",
+		  test, (int) rn, (int) n, (int) k);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp - 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[rn + 1] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + rn + 1, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch - 1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, rn + 1);
+	  mpn_dump (pp, rn + 1);
+	  mpn_dump (refp, rn + 1);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-sqrmod_bnm1.c b/tests/mpn/t-sqrmod_bnm1.c
new file mode 100644
index 0000000..27e5f38
--- /dev/null
+++ b/tests/mpn/t-sqrmod_bnm1.c

@@ -0,0 +1,182 @@
+/* Test for sqrmod_bnm1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 12
+#endif
+
+#ifndef COUNT
+#define COUNT 3000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for squaring modulo B^rn-1.
+
+  The result is expected to be ZERO if and only if one of the operand
+  already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+  B^rn-1. This should not be a problem if sqrmod_bnm1 is used to
+  combine results and obtain a natural number when one knows in
+  advance that the final value is less than (B^rn-1).
+*/
+
+static void
+ref_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < an && an <= rn);
+
+  refmpn_mul (rp, ap, an, ap, an);
+  an *= 2;
+  if (an > rn) {
+    cy = mpn_add (rp, rp, rn, rp + rn, an - rn);
+    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+     * be no overflow when adding in the carry. */
+    MPN_INCR_U (rp, rn, cy);
+  }
+}
+
+/*
+  Compare the result of the mpn_sqrmod_bnm1 function in the library
+  with the reference function above.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ASSERT_ALWAYS (mpn_sqrmod_bnm1_next_size (MAX_N) == MAX_N);
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 4);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (MAX_N, MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an,rn,n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+	;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+      n = mpn_sqrmod_bnm1_next_size (n);
+
+      if (n == 1)
+	an = 1;
+      else
+	an = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+
+      mpn_random2 (ap, an);
+
+      /* Sometime trigger the borderline conditions
+	 A = -1,0,+1 Mod(B^{n/2}+1).
+	 This only makes sense if there is at least a split, i.e. n is even. */
+      if ((test & 0x1f) == 1 && (n & 1) == 0) {
+	mp_size_t x;
+	MPN_COPY (ap, ap + (n >> 1), an - (n >> 1));
+	MPN_ZERO (ap + an - (n >> 1) , n - an);
+	x = 0;
+	/* x = (n == an) ? 0 : gmp_urandomm_ui (rands, n - an); */
+	ap[x] += gmp_urandomm_ui (rands, 3) - 1;
+      }
+      rn = MIN(n, 2*an);
+      mpn_random2 (pp-1, rn + 2);
+      p_before = pp[-1];
+      p_after = pp[rn];
+
+      itch = mpn_sqrmod_bnm1_itch (n, an);
+      ASSERT_ALWAYS (itch <= mpn_sqrmod_bnm1_itch (MAX_N, MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_sqrmod_bnm1 (  pp, n, ap, an, scratch);
+      ref_sqrmod_bnm1 (refp, n, ap, an);
+      if (pp[-1] != p_before || pp[rn] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, rn) != 0)
+	{
+	  printf ("ERROR in test %d, an = %d, n = %d\n",
+		  test, (int) an, (int) n);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[rn] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + rn, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, an);
+	  mpn_dump (pp, rn);
+	  mpn_dump (refp, rn);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/t-toom2-sqr.c b/tests/mpn/t-toom2-sqr.c
new file mode 100644
index 0000000..1b42850
--- /dev/null
+++ b/tests/mpn/t-toom2-sqr.c

@@ -0,0 +1,86 @@
+#define mpn_toomN_sqr mpn_toom2_sqr
+#define mpn_toomN_sqr_itch mpn_toom2_sqr_itch
+#define MIN_AN MPN_TOOM2_SQR_MINSIZE
+#define MAX_AN SQR_TOOM3_THRESHOLD
+
+#define MORE_SQR_TESTS explore_unlikely_branch
+#include "toom-sqr-shared.h"
+
+void
+explore_unlikely_branch (gmp_randstate_ptr rands)
+{
+  mp_ptr ap, refp, pp, scratch;
+  mp_size_t an;
+  mp_bitcnt_t bit;
+  TMP_DECL;
+  TMP_MARK;
+
+  ap = TMP_ALLOC_LIMBS (MAX_AN);
+  refp = TMP_ALLOC_LIMBS (MAX_AN * 2);
+  pp = 1 + TMP_ALLOC_LIMBS (MAX_AN * 2 + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_toomN_sqr_itch (MAX_AN) + 2);
+
+  for (an = MIN_AN + (MIN_AN & 1); an < MAX_AN; an+=2)
+    {
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      bit = an / 2 * GMP_NUMB_BITS
+	+ gmp_urandomm_ui (rands, an / 2 * GMP_NUMB_BITS - 1);
+
+      mpn_zero (ap, an);
+      mpn_zero (pp, an * 2);
+      pp [an - 1] |= GMP_NUMB_HIGHBIT;
+      pp [bit / GMP_NUMB_BITS] |= CNST_LIMB (1) << (bit % GMP_NUMB_BITS);
+      mpn_sqrtrem (ap, NULL, pp, an);
+      /* We need {ap, an} such that {ap + an/2, an/2} is zero and
+	 the result {pp, 2*an} is such that the sum
+	 {pp, an/2} + {pp + an/2, an/2} gives a carry. */
+      mpn_random2 (pp-1, an * 2 + 2);
+      p_before = pp[-1];
+      p_after = pp[an * 2];
+
+      itch = mpn_toomN_sqr_itch (an);
+      ASSERT_ALWAYS (itch <= mpn_toomN_sqr_itch (MAX_AN));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_toomN_sqr (pp, ap, an, scratch);
+      refmpn_mul (refp, ap, an, ap, an);
+      if (pp[-1] != p_before || pp[an * 2] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, an * 2) != 0)
+	{
+	  printf ("ERROR with bit %lu, an = %d\n",
+		  (unsigned long) bit, (int) an);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[an * 2] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + an * 2, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, an);
+	  mpn_dump (pp, an * 2);
+	  mpn_dump (refp, an * 2);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+}

diff --git a/tests/mpn/t-toom22.c b/tests/mpn/t-toom22.c
new file mode 100644
index 0000000..c9beed9
--- /dev/null
+++ b/tests/mpn/t-toom22.c

@@ -0,0 +1,10 @@
+#define mpn_toomMN_mul mpn_toom22_mul
+#define mpn_toomMN_mul_itch mpn_toom22_mul_itch
+#define MIN_AN MIN(MPN_TOOM22_MUL_MINSIZE,4)
+
+#define MIN_BN(an)				\
+  ((an) >= 2*MUL_TOOM22_THRESHOLD		\
+   ? (an) + 2 - MUL_TOOM22_THRESHOLD		\
+   : ((an)+1)/2 + 1 + (an & 1))
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom3-sqr.c b/tests/mpn/t-toom3-sqr.c
new file mode 100644
index 0000000..ccc3b99
--- /dev/null
+++ b/tests/mpn/t-toom3-sqr.c

@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom3_sqr
+#define mpn_toomN_sqr_itch mpn_toom3_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MPN_TOOM3_SQR_MINSIZE)
+#define MAX_AN SQR_TOOM4_THRESHOLD
+
+#include "toom-sqr-shared.h"

diff --git a/tests/mpn/t-toom32.c b/tests/mpn/t-toom32.c
new file mode 100644
index 0000000..e42745d
--- /dev/null
+++ b/tests/mpn/t-toom32.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom32_mul
+#define mpn_toomMN_mul_itch mpn_toom32_mul_itch
+
+#define MIN_AN 6
+#define MIN_BN(an) (((an) + 8) / (size_t) 3)
+#define MAX_BN(an) ((an) - 2)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom33.c b/tests/mpn/t-toom33.c
new file mode 100644
index 0000000..7de82b2
--- /dev/null
+++ b/tests/mpn/t-toom33.c

@@ -0,0 +1,11 @@
+#define mpn_toomMN_mul mpn_toom33_mul
+#define mpn_toomMN_mul_itch mpn_toom33_mul_itch
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul with invalid input size. */
+#define MIN_AN MUL_TOOM33_THRESHOLD
+#define MIN_BN(an) (1 + 2*(((an)+2)/(size_t) 3))
+
+#define COUNT 1000
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom4-sqr.c b/tests/mpn/t-toom4-sqr.c
new file mode 100644
index 0000000..ca14ab1
--- /dev/null
+++ b/tests/mpn/t-toom4-sqr.c

@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom4_sqr
+#define mpn_toomN_sqr_itch mpn_toom4_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MPN_TOOM4_SQR_MINSIZE))
+#define MAX_AN SQR_TOOM6_THRESHOLD
+
+#include "toom-sqr-shared.h"

diff --git a/tests/mpn/t-toom42.c b/tests/mpn/t-toom42.c
new file mode 100644
index 0000000..09a4a0c
--- /dev/null
+++ b/tests/mpn/t-toom42.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom42_mul
+#define mpn_toomMN_mul_itch mpn_toom42_mul_itch
+
+#define MIN_AN 10
+#define MIN_BN(an) (((an) + 7) >> 2)
+#define MAX_BN(an) ((2*(an)-5) / (size_t) 3)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom43.c b/tests/mpn/t-toom43.c
new file mode 100644
index 0000000..224a45b
--- /dev/null
+++ b/tests/mpn/t-toom43.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom43_mul
+#define mpn_toomMN_mul_itch mpn_toom43_mul_itch
+
+#define MIN_AN 25
+#define MIN_BN(an) (1 + 2*(((an)+3) >> 2))
+#define MAX_BN(an) ((an)-3)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom44.c b/tests/mpn/t-toom44.c
new file mode 100644
index 0000000..6c627e3
--- /dev/null
+++ b/tests/mpn/t-toom44.c

@@ -0,0 +1,11 @@
+#define mpn_toomMN_mul mpn_toom44_mul
+#define mpn_toomMN_mul_itch mpn_toom44_mul_itch
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul or toom33_mul with invalid input size. */
+#define MIN_AN MUL_TOOM44_THRESHOLD
+#define MIN_BN(an) (1 + 3*(((an)+3)>>2))
+
+#define COUNT 1000
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom52.c b/tests/mpn/t-toom52.c
new file mode 100644
index 0000000..d3fb134
--- /dev/null
+++ b/tests/mpn/t-toom52.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom52_mul
+#define mpn_toomMN_mul_itch mpn_toom52_mul_itch
+
+#define MIN_AN 32
+#define MIN_BN(an) (((an) + 9) / (size_t) 5)
+#define MAX_BN(an) (((an) - 3) >> 1)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom53.c b/tests/mpn/t-toom53.c
new file mode 100644
index 0000000..ddbf177
--- /dev/null
+++ b/tests/mpn/t-toom53.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom53_mul
+#define mpn_toomMN_mul_itch mpn_toom53_mul_itch
+
+#define MIN_AN 17
+#define MIN_BN(an) (1 + 2*(((an) + 4) / (size_t) 5))
+#define MAX_BN(an) ((3*(an) - 11) >> 2)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom54.c b/tests/mpn/t-toom54.c
new file mode 100644
index 0000000..52a2bee
--- /dev/null
+++ b/tests/mpn/t-toom54.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom54_mul
+#define mpn_toomMN_mul_itch mpn_toom54_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) ((3*(an) + 32) / (size_t) 5)		/* 3/5 */
+#define MAX_BN(an) ((an) - 6)	                        /* 1/1 */
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom6-sqr.c b/tests/mpn/t-toom6-sqr.c
new file mode 100644
index 0000000..67d7a63
--- /dev/null
+++ b/tests/mpn/t-toom6-sqr.c

@@ -0,0 +1,8 @@
+#define mpn_toomN_sqr mpn_toom6_sqr
+#define mpn_toomN_sqr_itch mpn_toom6_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MPN_TOOM6_SQR_MINSIZE)))
+#define MAX_AN SQR_TOOM8_THRESHOLD
+
+#define COUNT 250
+
+#include "toom-sqr-shared.h"

diff --git a/tests/mpn/t-toom62.c b/tests/mpn/t-toom62.c
new file mode 100644
index 0000000..1cb2aab
--- /dev/null
+++ b/tests/mpn/t-toom62.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom62_mul
+#define mpn_toomMN_mul_itch mpn_toom62_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) (((an) + 11) / (size_t) 6)
+#define MAX_BN(an) ((2*(an) - 7) / (size_t) 5)
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom63.c b/tests/mpn/t-toom63.c
new file mode 100644
index 0000000..d79165d
--- /dev/null
+++ b/tests/mpn/t-toom63.c

@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom63_mul
+#define mpn_toomMN_mul_itch mpn_toom63_mul_itch
+
+#define MIN_AN 49
+#define MIN_BN(an) (2*(((an) + 23) / (size_t) 6))	/* 2/6 */
+#define MAX_BN(an) ((3*(an) - 23)  / (size_t) 5)	/* 3/5 */
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom6h.c b/tests/mpn/t-toom6h.c
new file mode 100644
index 0000000..5cca9fc
--- /dev/null
+++ b/tests/mpn/t-toom6h.c

@@ -0,0 +1,13 @@
+#define mpn_toomMN_mul mpn_toom6h_mul
+#define mpn_toomMN_mul_itch mpn_toom6h_mul_itch
+
+#define SIZE_LOG 11
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul, toom33_mul, or toom44_mul with invalid input size. */
+#define MIN_AN MUL_TOOM6H_MIN
+#define MIN_BN(an) (MAX ((an*3)>>3, 46))
+
+#define COUNT 1000
+
+#include "toom-shared.h"

diff --git a/tests/mpn/t-toom8-sqr.c b/tests/mpn/t-toom8-sqr.c
new file mode 100644
index 0000000..0eee605
--- /dev/null
+++ b/tests/mpn/t-toom8-sqr.c

@@ -0,0 +1,8 @@
+#define mpn_toomN_sqr mpn_toom8_sqr
+#define mpn_toomN_sqr_itch mpn_toom8_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MAX(SQR_TOOM8_THRESHOLD,MPN_TOOM8_SQR_MINSIZE))))
+#define MAX_AN SQR_FFT_THRESHOLD
+
+#define COUNT 250
+
+#include "toom-sqr-shared.h"

diff --git a/tests/mpn/t-toom8h.c b/tests/mpn/t-toom8h.c
new file mode 100644
index 0000000..b21344e
--- /dev/null
+++ b/tests/mpn/t-toom8h.c

@@ -0,0 +1,19 @@
+#define mpn_toomMN_mul mpn_toom8h_mul
+#define mpn_toomMN_mul_itch mpn_toom8h_mul_itch
+
+#define SIZE_LOG 11
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom{22,33,44,6h}_mul with invalid input size. */
+#define MIN_AN MUL_TOOM8H_MIN
+
+#define MIN_BN(an)			 \
+(MAX(GMP_NUMB_BITS <=  9*3 ? (an*7)/ 9 : \
+     GMP_NUMB_BITS <= 10*3 ? (an*6)/10 : \
+     GMP_NUMB_BITS <= 11*3 ? (an*5)/11 : \
+     GMP_NUMB_BITS <= 12*3 ? (an*4)/12 : \
+     (an*4)/13, 86) )
+
+#define COUNT 1000
+
+#include "toom-shared.h"

diff --git a/tests/mpn/toom-shared.h b/tests/mpn/toom-shared.h
new file mode 100644
index 0000000..8188b00
--- /dev/null
+++ b/tests/mpn/toom-shared.h

@@ -0,0 +1,148 @@
+/* Test for various Toom functions.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Main file is expected to define mpn_toomMN_mul,
+ * mpn_toomMN_mul_itch, MIN_AN, MIN_BN(an), MAX_BN(an) and then
+ * include this file. */
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 10
+#endif
+
+#ifndef COUNT
+#define COUNT 2000
+#endif
+
+#define MAX_AN (1L << SIZE_LOG)
+
+#ifndef MAX_BN
+#define MAX_BN(an) (an)
+#endif
+
+/* For general toomMN_mul, we need
+ *
+ * MIN_BN(an) = N + floor(((N-1)*an + M - N)/M)
+ *
+ * MAX_BN(an) = floor(N*(an-1)/(M-1)) - N + 1
+ */
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_AN);
+  bp = TMP_ALLOC_LIMBS (MAX_BN(MAX_AN));
+  refp = TMP_ALLOC_LIMBS (MAX_AN + MAX_BN(MAX_AN));
+  pp = 1+TMP_ALLOC_LIMBS (MAX_AN + MAX_BN(MAX_AN)+2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_toomMN_mul_itch (MAX_AN, MAX_BN(MAX_AN))
+			 + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an, bn;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_AN; size_min++)
+	;
+
+      /* We generate an in the MIN_AN <= an <= (1 << size_range). */
+      size_range = size_min
+	+ gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      an = MIN_AN
+	+ gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_AN);
+      bn = MIN_BN(an)
+	+ gmp_urandomm_ui (rands, MAX_BN(an) + 1 - MIN_BN(an));
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+      mpn_random2 (pp-1, an + bn + 2);
+      p_before = pp[-1];
+      p_after = pp[an + bn];
+
+      itch = mpn_toomMN_mul_itch (an, bn);
+      ASSERT_ALWAYS (itch <= mpn_toomMN_mul_itch (MAX_AN, MAX_BN(MAX_AN)));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_toomMN_mul (pp, ap, an, bp, bn, scratch);
+      refmpn_mul (refp, ap, an, bp, bn);
+      if (pp[-1] != p_before || pp[an + bn] != p_after
+	  || scratch[-1] != s_before || scratch[itch] != s_after
+	  || mpn_cmp (refp, pp, an + bn) != 0)
+	{
+	  printf ("ERROR in test %d, an = %d, bn = %d\n",
+		  test, (int) an, (int) bn);
+	  if (pp[-1] != p_before)
+	    {
+	      printf ("before pp:"); mpn_dump (pp -1, 1);
+	      printf ("keep:   "); mpn_dump (&p_before, 1);
+	    }
+	  if (pp[an + bn] != p_after)
+	    {
+	      printf ("after pp:"); mpn_dump (pp + an + bn, 1);
+	      printf ("keep:   "); mpn_dump (&p_after, 1);
+	    }
+	  if (scratch[-1] != s_before)
+	    {
+	      printf ("before scratch:"); mpn_dump (scratch-1, 1);
+	      printf ("keep:   "); mpn_dump (&s_before, 1);
+	    }
+	  if (scratch[itch] != s_after)
+	    {
+	      printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+	      printf ("keep:   "); mpn_dump (&s_after, 1);
+	    }
+	  mpn_dump (ap, an);
+	  mpn_dump (bp, bn);
+	  mpn_dump (pp, an + bn);
+	  mpn_dump (refp, an + bn);
+
+	  abort();
+	}
+    }
+  TMP_FREE;
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpn/toom-sqr-shared.h b/tests/mpn/toom-sqr-shared.h
new file mode 100644
index 0000000..8bab8e1
--- /dev/null
+++ b/tests/mpn/toom-sqr-shared.h

@@ -0,0 +1,125 @@
+/* Test for various Toom squaring functions.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Main file is expected to define mpn_toomN_mul, mpn_toomN_sqr_itch,
+ * MIN_AN, MAX_AN and then include this file. */
+
+#ifndef COUNT
+#define COUNT 2000
+#endif
+
+#ifdef MORE_SQR_TESTS
+void MORE_SQR_TESTS (gmp_randstate_ptr);
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+
+  if (MAX_AN > MIN_AN) {
+    rands = RANDS;
+
+    ap = TMP_ALLOC_LIMBS (MAX_AN);
+    refp = TMP_ALLOC_LIMBS (MAX_AN * 2);
+    pp = 1 + TMP_ALLOC_LIMBS (MAX_AN * 2 + 2);
+    scratch
+      = 1+TMP_ALLOC_LIMBS (mpn_toomN_sqr_itch (MAX_AN) + 2);
+
+    for (test = 0; test < count; test++)
+      {
+	mp_size_t an;
+	mp_size_t itch;
+	mp_limb_t p_before, p_after, s_before, s_after;
+
+	an = MIN_AN
+	  + gmp_urandomm_ui (rands, MAX_AN - MIN_AN);
+
+	mpn_random2 (ap, an);
+	mpn_random2 (pp-1, an * 2 + 2);
+	p_before = pp[-1];
+	p_after = pp[an * 2];
+
+	itch = mpn_toomN_sqr_itch (an);
+	ASSERT_ALWAYS (itch <= mpn_toomN_sqr_itch (MAX_AN));
+	mpn_random2 (scratch-1, itch+2);
+	s_before = scratch[-1];
+	s_after = scratch[itch];
+
+	mpn_toomN_sqr (pp, ap, an, scratch);
+	refmpn_mul (refp, ap, an, ap, an);
+	if (pp[-1] != p_before || pp[an * 2] != p_after
+	    || scratch[-1] != s_before || scratch[itch] != s_after
+	    || mpn_cmp (refp, pp, an * 2) != 0)
+	  {
+	    printf ("ERROR in test %d, an = %d\n",
+		    test, (int) an);
+	    if (pp[-1] != p_before)
+	      {
+		printf ("before pp:"); mpn_dump (pp -1, 1);
+		printf ("keep:   "); mpn_dump (&p_before, 1);
+	      }
+	    if (pp[an * 2] != p_after)
+	      {
+		printf ("after pp:"); mpn_dump (pp + an * 2, 1);
+		printf ("keep:   "); mpn_dump (&p_after, 1);
+	      }
+	    if (scratch[-1] != s_before)
+	      {
+		printf ("before scratch:"); mpn_dump (scratch-1, 1);
+		printf ("keep:   "); mpn_dump (&s_before, 1);
+	      }
+	    if (scratch[itch] != s_after)
+	      {
+		printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+		printf ("keep:   "); mpn_dump (&s_after, 1);
+	      }
+	    mpn_dump (ap, an);
+	    mpn_dump (pp, an * 2);
+	    mpn_dump (refp, an * 2);
+
+	    abort();
+	  }
+      }
+    TMP_FREE;
+
+#ifdef MORE_SQR_TESTS
+  MORE_SQR_TESTS (rands);
+#endif
+  }
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpq/io.c b/tests/mpq/io.c
new file mode 100644
index 0000000..9516d49
--- /dev/null
+++ b/tests/mpq/io.c

@@ -0,0 +1,136 @@
+/* Test conversion and I/O using mpq_out_str and mpq_inp_str.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>		/* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "/tmp/io.tmp"
+
+void
+debug_mp (mpq_t x, int base)
+{
+  mpq_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpq_t  op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 10000;
+  FILE *fp;
+  int base;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t nread;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpq_init (op1);
+  mpq_init (op2);
+
+  fp = fopen (FILENAME, "w+");
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs) + 2;
+      mpz_errandomb (mpq_numref(op1), rands, size);
+      mpz_errandomb_nonzero (mpq_denref(op1), rands, size);
+      mpq_canonicalize (op1);
+
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpq_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 16);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 36 + 1;
+      if (base == 1)
+	base = 0;
+
+      rewind (fp);
+      if (mpq_out_str (fp, base, op1) == 0
+	  || putc (' ', fp) == EOF
+	  || fflush (fp) != 0)
+	{
+	  printf ("mpq_out_str write error\n");
+	  abort ();
+	}
+
+      rewind (fp);
+      nread = mpq_inp_str (op2, fp, base);
+      if (nread == 0)
+	{
+	  if (ferror (fp))
+	    printf ("mpq_inp_str stream read error\n");
+	  else
+	    printf ("mpq_inp_str data conversion error\n");
+	  abort ();
+	}
+
+      if (nread != ftell(fp))
+	{
+	  printf ("mpq_inp_str nread doesn't match ftell\n");
+	  printf ("  nread  %lu\n", (unsigned long) nread);
+	  printf ("  ftell  %ld\n", ftell(fp));
+	  abort ();
+	}
+
+      if (mpq_cmp (op1, op2))
+	{
+	  printf ("ERROR\n");
+	  printf ("op1  = "); debug_mp (op1, -16);
+	  printf ("op2  = "); debug_mp (op2, -16);
+	  printf ("base = %d\n", base);
+	  abort ();
+	}
+    }
+
+  fclose (fp);
+
+  unlink (FILENAME);
+
+  mpz_clear (bs);
+  mpq_clear (op1);
+  mpq_clear (op2);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/reuse.c b/tests/mpq/reuse.c
new file mode 100644
index 0000000..b724ffc
--- /dev/null
+++ b/tests/mpq/reuse.c

@@ -0,0 +1,245 @@
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 2000-2002, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpq_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpq_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void dump_abort (const char *, mpq_t, mpq_t);
+
+typedef void (*dss_func) (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+dss_func dss_funcs[] =
+{
+  mpq_div, mpq_add, mpq_mul, mpq_sub,
+};
+
+const char *dss_func_names[] =
+{
+  "mpq_div", "mpq_add", "mpq_mul", "mpq_sub",
+};
+
+typedef void (*ds_func) (mpq_ptr, mpq_srcptr);
+
+ds_func ds_funcs[] =
+{
+  mpq_abs, mpq_neg,
+};
+
+const char *ds_func_names[] =
+{
+  "mpq_abs", "mpq_neg",
+};
+
+typedef void (*dsi_func) (mpq_ptr, mpq_srcptr, unsigned long int);
+
+dsi_func dsi_funcs[] =
+{
+  mpq_mul_2exp, mpq_div_2exp
+};
+
+const char *dsi_func_names[] =
+{
+  "mpq_mul_2exp", "mpq_div_2exp"
+};
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int pass, reps = 100;
+  mpq_t in1, in2, out1;
+  unsigned long int randbits, in2i;
+  mpq_t res1, res2;
+  gmp_randstate_ptr  rands;
+
+  tests_start ();
+
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpq_init (in1);
+  mpq_init (in2);
+  mpq_init (out1);
+  mpq_init (res1);
+  mpq_init (res2);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      randbits = urandom ();
+
+      if (randbits & 1)
+	{
+	  mpq_clear (in1);
+	  mpq_init (in1);
+	}
+      randbits >>= 1;
+      mpz_errandomb (mpq_numref(in1), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(in1), rands, 512L);
+      if (randbits & 1)
+	mpz_neg (mpq_numref(in1),mpq_numref(in1));
+      randbits >>= 1;
+      mpq_canonicalize (in1);
+
+      if (randbits & 1)
+	{
+	  mpq_clear (in2);
+	  mpq_init (in2);
+	}
+      randbits >>= 1;
+      mpz_errandomb (mpq_numref(in2), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(in2), rands, 512L);
+      if (randbits & 1)
+	mpz_neg (mpq_numref(in2),mpq_numref(in2));
+      randbits >>= 1;
+      mpq_canonicalize (in2);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+	{
+	  /* Don't divide by 0.  */
+	  if (i == 0 && mpq_cmp_ui (in2, 0, 1) == 0)
+	    continue;
+
+	  if (randbits & 1)
+	    {
+	      mpq_clear (res1);
+	      mpq_init (res1);
+	    }
+	  randbits >>= 1;
+
+	  (dss_funcs[i]) (res1, in1, in2);
+	  MPQ_CHECK_FORMAT(res1);
+
+	  mpq_set (out1, in1);
+	  (dss_funcs[i]) (out1, out1, in2);
+	  MPQ_CHECK_FORMAT(out1);
+
+	  if (mpq_cmp (res1, out1) != 0)
+	    dump_abort (dss_func_names[i], res1, out1);
+
+	  mpq_set (out1, in2);
+	  (dss_funcs[i]) (out1, in1, out1);
+	  MPQ_CHECK_FORMAT(out1);
+
+	  if (mpq_cmp (res1, out1) != 0)
+	    dump_abort (dss_func_names[i], res1, out1);
+
+	  mpq_set (out1, in2);
+	  (dss_funcs[i]) (res1, out1, in2);
+	  MPQ_CHECK_FORMAT(res1);
+
+	  (dss_funcs[i]) (res2, in2, in2);
+	  MPQ_CHECK_FORMAT(res2);
+
+	  (dss_funcs[i]) (out1, out1, out1);
+	  MPQ_CHECK_FORMAT(out1);
+
+	  if (mpq_cmp (res1, res2) != 0)
+	    dump_abort (dss_func_names[i], res1, res2);
+	  if (mpq_cmp (res1, out1) != 0)
+	    dump_abort (dss_func_names[i], res1, out1);
+	}
+
+      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+	{
+	  if (randbits & 1)
+	    {
+	      mpq_clear (res1);
+	      mpq_init (res1);
+	    }
+	  randbits >>= 1;
+	  (ds_funcs[i]) (res1, in1);
+	  MPQ_CHECK_FORMAT(res1);
+
+	  mpq_set (out1, in1);
+	  (ds_funcs[i]) (out1, out1);
+	  MPQ_CHECK_FORMAT(out1);
+
+	  if (mpq_cmp (res1, out1) != 0)
+	    dump_abort (ds_func_names[i], res1, out1);
+	}
+
+      in2i = urandom () % 65536;
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+	{
+	  if (randbits & 1)
+	    {
+	      mpq_clear (res1);
+	      mpq_init (res1);
+	    }
+	  randbits >>= 1;
+
+	  (dsi_funcs[i]) (res1, in1, in2i);
+	  MPQ_CHECK_FORMAT(res1);
+
+	  mpq_set (out1, in1);
+	  (dsi_funcs[i]) (out1, out1, in2i);
+	  MPQ_CHECK_FORMAT(out1);
+
+	  if (mpq_cmp (res1, out1) != 0)
+	    dump_abort (dsi_func_names[i], res1, out1);
+	}
+
+    }
+
+  mpq_clear (in1);
+  mpq_clear (in2);
+  mpq_clear (out1);
+  mpq_clear (res1);
+  mpq_clear (res2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *name, mpq_t res1, mpq_t res2)
+{
+  printf ("failure in %s:\n", name);
+  mpq_trace ("  res1  ", res1);
+  mpq_trace ("  res2  ", res2);
+  abort ();
+}
+
+#endif /* ! DLL_EXPORT */

diff --git a/tests/mpq/t-aors.c b/tests/mpq/t-aors.c
new file mode 100644
index 0000000..8ee9f93
--- /dev/null
+++ b/tests/mpq/t-aors.c

@@ -0,0 +1,182 @@
+/* Test mpq_add and mpq_sub.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_all (mpq_ptr x, mpq_ptr y, mpq_ptr want_add, mpq_ptr want_sub)
+{
+  mpq_t  got;
+  int    neg_x, neg_y, swap;
+
+  mpq_init (got);
+
+  MPQ_CHECK_FORMAT (want_add);
+  MPQ_CHECK_FORMAT (want_sub);
+  MPQ_CHECK_FORMAT (x);
+  MPQ_CHECK_FORMAT (y);
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      for (neg_x = 0; neg_x <= 1; neg_x++)
+        {
+          for (neg_y = 0; neg_y <= 1; neg_y++)
+            {
+              mpq_add (got, x, y);
+              MPQ_CHECK_FORMAT (got);
+              if (! mpq_equal (got, want_add))
+                {
+                  printf ("mpq_add wrong\n");
+                  mpq_trace ("  x   ", x);
+                  mpq_trace ("  y   ", y);
+                  mpq_trace ("  got ", got);
+                  mpq_trace ("  want", want_add);
+                  abort ();
+                }
+
+              mpq_sub (got, x, y);
+              MPQ_CHECK_FORMAT (got);
+              if (! mpq_equal (got, want_sub))
+                {
+                  printf ("mpq_sub wrong\n");
+                  mpq_trace ("  x   ", x);
+                  mpq_trace ("  y   ", y);
+                  mpq_trace ("  got ", got);
+                  mpq_trace ("  want", want_sub);
+                  abort ();
+                }
+
+
+              mpq_neg (y, y);
+              mpq_swap (want_add, want_sub);
+            }
+
+          mpq_neg (x, x);
+          mpq_swap (want_add, want_sub);
+          mpq_neg (want_add, want_add);
+          mpq_neg (want_sub, want_sub);
+        }
+
+      mpq_swap (x, y);
+      mpq_neg (want_sub, want_sub);
+    }
+
+  mpq_clear (got);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *x;
+    const char  *y;
+    const char  *want_add;
+    const char  *want_sub;
+
+  } data[] = {
+
+    { "0", "0", "0", "0" },
+    { "1", "0", "1", "1" },
+    { "1", "1", "2", "0" },
+
+    { "1/2", "1/2", "1", "0" },
+    { "5/6", "14/15", "53/30", "-1/10" },
+  };
+
+  mpq_t  x, y, want_add, want_sub;
+  int i;
+
+  mpq_init (x);
+  mpq_init (y);
+  mpq_init (want_add);
+  mpq_init (want_sub);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (x, data[i].x, 0);
+      mpq_set_str_or_abort (y, data[i].y, 0);
+      mpq_set_str_or_abort (want_add, data[i].want_add, 0);
+      mpq_set_str_or_abort (want_sub, data[i].want_sub, 0);
+
+      check_all (x, y, want_add, want_sub);
+    }
+
+  mpq_clear (x);
+  mpq_clear (y);
+  mpq_clear (want_add);
+  mpq_clear (want_sub);
+}
+
+
+void
+check_rand (void)
+{
+  mpq_t  x, y, want_add, want_sub;
+  int i;
+  gmp_randstate_ptr  rands = RANDS;
+
+  mpq_init (x);
+  mpq_init (y);
+  mpq_init (want_add);
+  mpq_init (want_sub);
+
+  for (i = 0; i < 500; i++)
+    {
+      mpz_errandomb (mpq_numref(x), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(x), rands, 512L);
+      mpq_canonicalize (x);
+
+      mpz_errandomb (mpq_numref(y), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(y), rands, 512L);
+      mpq_canonicalize (y);
+
+      refmpq_add (want_add, x, y);
+      refmpq_sub (want_sub, x, y);
+
+      check_all (x, y, want_add, want_sub);
+    }
+
+  mpq_clear (x);
+  mpq_clear (y);
+  mpq_clear (want_add);
+  mpq_clear (want_sub);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_rand ();
+
+  tests_end ();
+
+  exit (0);
+}

diff --git a/tests/mpq/t-cmp.c b/tests/mpq/t-cmp.c
new file mode 100644
index 0000000..187db7f
--- /dev/null
+++ b/tests/mpq/t-cmp.c

@@ -0,0 +1,101 @@
+/* Test mpq_cmp.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
+
+int
+ref_mpq_cmp (mpq_t a, mpq_t b)
+{
+  mpz_t ai, bi;
+  int cc;
+
+  mpz_init (ai);
+  mpz_init (bi);
+
+  mpz_mul (ai, NUM (a), DEN (b));
+  mpz_mul (bi, NUM (b), DEN (a));
+  cc = mpz_cmp (ai, bi);
+  mpz_clear (ai);
+  mpz_clear (bi);
+  return cc;
+}
+
+#ifndef SIZE
+#define SIZE 8	/* increasing this lowers the probability of finding an error */
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mp_size_t size;
+  int reps = 10000;
+  int i;
+  int cc, ccref;
+
+  tests_start ();
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpq_init (a);
+  mpq_init (b);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (a), size);
+      do
+	{
+	  size = urandom () % SIZE - SIZE/2;
+	  mpz_random2 (DEN (a), size);
+	}
+      while (mpz_cmp_ui (DEN (a), 0) == 0);
+
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (b), size);
+      do
+	{
+	  size = urandom () % SIZE - SIZE/2;
+	  mpz_random2 (DEN (b), size);
+	}
+      while (mpz_cmp_ui (DEN (b), 0) == 0);
+
+      mpq_canonicalize (a);
+      mpq_canonicalize (b);
+
+      ccref = ref_mpq_cmp (a, b);
+      cc = mpq_cmp (a, b);
+
+      if (SGN (ccref) != SGN (cc))
+	abort ();
+    }
+
+  mpq_clear (a);
+  mpq_clear (b);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-cmp_si.c b/tests/mpq/t-cmp_si.c
new file mode 100644
index 0000000..5ffa247
--- /dev/null
+++ b/tests/mpq/t-cmp_si.c

@@ -0,0 +1,117 @@
+/* Test mpq_cmp_si.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define SGN(x)   ((x)<0 ? -1 : (x) != 0)
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *q;
+    long           n;
+    unsigned long  d;
+    int            want;
+  } data[] = {
+    { "0", 0, 1, 0 },
+    { "0", 0, 123, 0 },
+    { "0", 0, ULONG_MAX, 0 },
+    { "1", 0, 1, 1 },
+    { "1", 0, 123, 1 },
+    { "1", 0, ULONG_MAX, 1 },
+    { "-1", 0, 1, -1 },
+    { "-1", 0, 123, -1 },
+    { "-1", 0, ULONG_MAX, -1 },
+
+    { "123", 123, 1, 0 },
+    { "124", 123, 1, 1 },
+    { "122", 123, 1, -1 },
+
+    { "-123", 123, 1, -1 },
+    { "-124", 123, 1, -1 },
+    { "-122", 123, 1, -1 },
+
+    { "123", -123, 1, 1 },
+    { "124", -123, 1, 1 },
+    { "122", -123, 1, 1 },
+
+    { "-123", -123, 1, 0 },
+    { "-124", -123, 1, -1 },
+    { "-122", -123, 1, 1 },
+
+    { "5/7", 3,4, -1 },
+    { "5/7", -3,4, 1 },
+    { "-5/7", 3,4, -1 },
+    { "-5/7", -3,4, 1 },
+  };
+
+  mpq_t  q;
+  int    i, got;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      MPQ_CHECK_FORMAT (q);
+
+      got = mpq_cmp_si (q, data[i].n, data[i].d);
+      if (SGN(got) != data[i].want)
+        {
+          printf ("mpq_cmp_si wrong\n");
+        error:
+          mpq_trace ("  q", q);
+          printf ("  n=%ld\n", data[i].n);
+          printf ("  d=%lu\n", data[i].d);
+          printf ("  got=%d\n", got);
+          printf ("  want=%d\n", data[i].want);
+          abort ();
+        }
+
+      if (data[i].n == 0)
+        {
+          got = mpq_cmp_si (q, 0L, data[i].d);
+          if (SGN(got) != data[i].want)
+            {
+              printf ("mpq_cmp_si wrong\n");
+              goto error;
+            }
+        }
+    }
+
+  mpq_clear (q);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-cmp_ui.c b/tests/mpq/t-cmp_ui.c
new file mode 100644
index 0000000..51bb2cb
--- /dev/null
+++ b/tests/mpq/t-cmp_ui.c

@@ -0,0 +1,116 @@
+/* Test mpq_cmp_ui.
+
+Copyright 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
+
+int
+ref_mpq_cmp_ui (mpq_t a, unsigned long int bn, unsigned long int bd)
+{
+  mpz_t ai, bi;
+  int cc;
+
+  mpz_init (ai);
+  mpz_init (bi);
+
+  mpz_mul_ui (ai, NUM (a), bd);
+  mpz_mul_ui (bi, DEN (a), bn);
+  cc = mpz_cmp (ai, bi);
+  mpz_clear (ai);
+  mpz_clear (bi);
+  return cc;
+}
+
+#ifndef SIZE
+#define SIZE 8	/* increasing this lowers the probability of finding an error */
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mp_size_t size;
+  int reps = 10000;
+  int i;
+  int cc, ccref;
+  unsigned long int bn, bd;
+
+  tests_start ();
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpq_init (a);
+  mpq_init (b);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (a), size);
+      do
+	{
+	  size = urandom () % SIZE - SIZE/2;
+	  mpz_random2 (DEN (a), size);
+	}
+      while (mpz_cmp_ui (DEN (a), 0) == 0);
+
+      mpz_random2 (NUM (b), (mp_size_t) 1);
+      mpz_mod_ui (NUM (b), NUM (b), ~(unsigned long int) 0);
+      mpz_add_ui (NUM (b), NUM (b), 1);
+
+      mpz_random2 (DEN (b), (mp_size_t) 1);
+      mpz_mod_ui (DEN (b), DEN (b), ~(unsigned long int) 0);
+      mpz_add_ui (DEN (b), DEN (b), 1);
+
+      mpq_canonicalize (a);
+      mpq_canonicalize (b);
+
+      ccref = ref_mpq_cmp_ui (a, 1, 1);
+      cc = mpq_cmp_ui (a, 1, 1);
+
+      if (SGN (ccref) != SGN (cc))
+	abort ();
+
+      ccref = ref_mpq_cmp_ui (a, 0, 1);
+      cc = mpq_cmp_ui (a, 0, 1);
+
+      if (SGN (ccref) != SGN (cc))
+	abort ();
+
+      bn = mpz_get_ui (NUM (b));
+      bd = mpz_get_ui (DEN (b));
+
+      ccref = ref_mpq_cmp_ui (a, bn, bd);
+      cc = mpq_cmp_ui (a, bn, bd);
+
+      if (SGN (ccref) != SGN (cc))
+	abort ();
+    }
+
+  mpq_clear (a);
+  mpq_clear (b);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-cmp_z.c b/tests/mpq/t-cmp_z.c
new file mode 100644
index 0000000..3e66a3c
--- /dev/null
+++ b/tests/mpq/t-cmp_z.c

@@ -0,0 +1,146 @@
+/* Test mpq_cmp_z.
+
+Copyright 1996, 2001, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
+
+int
+ref_mpq_cmp_z (mpq_t a, mpz_t b)
+{
+  mpz_t bi;
+  int cc;
+
+  mpz_init (bi);
+
+  mpz_mul (bi, b, DEN (a));
+  cc = mpz_cmp (NUM (a), bi);
+  mpz_clear (bi);
+  return cc;
+}
+
+#ifndef SIZE
+#define SIZE 8	/* increasing this lowers the probability of finding an error */
+#endif
+
+#ifndef MAXN
+#define MAXN 5	/* increasing this impatcs on total timing */
+#endif
+
+void
+sizes_test (int m)
+{
+  mpq_t a;
+  mpz_t b;
+  int i, j, k, s;
+  int cc, ccref;
+
+  mpq_init (a);
+  mpz_init (b);
+
+  for (i = 0; i <= MAXN ; ++i)
+    {
+      mpz_setbit (DEN (a), i*m); /* \sum_0^i 2^(i*m) */
+      for (j = 0; j <= MAXN; ++j)
+	{
+	  mpz_set_ui (NUM (a), 0);
+	  mpz_setbit (NUM (a), j*m); /* 2^(j*m) */
+	  for (k = 0; k <= MAXN; ++k)
+	    {
+	      mpz_set_ui (b, 0);
+	      mpz_setbit (b, k*m); /* 2^(k*m) */
+	      if (i == 0) /* Denominator is 1, compare the two exponents */
+		ccref = (j>k)-(j<k);
+	      else
+		ccref = j-i > k ? 1 : -1;
+	      for (s = 1; s >= -1; s -= 2)
+		{
+		  cc = mpq_cmp_z (a, b);
+
+		  if (ccref != SGN (cc))
+		    {
+		      fprintf (stderr, "i=%i, j=%i, k=%i, m=%i, s=%i\n; ccref= %i, cc= %i\n", i, j, k, m, s, ccref, cc);
+		      abort ();
+		    }
+
+		  mpq_neg (a, a);
+		  mpz_neg (b, b);
+		  ccref = - ccref;
+		}
+	    }
+	}
+    }
+
+  mpq_clear (a);
+  mpz_clear (b);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a;
+  mpz_t b;
+  mp_size_t size;
+  int reps = 10000;
+  int i;
+  int cc, ccref;
+
+  tests_start ();
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpq_init (a);
+  mpz_init (b);
+
+  for (i = 0; i < reps; i++)
+    {
+      if (i % 8192 == 0)
+	sizes_test (urandom () % (i + 1) + 1);
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (a), size);
+      do
+	{
+	  size = urandom () % (SIZE/2);
+	  mpz_random2 (DEN (a), size);
+	}
+      while (mpz_cmp_ui (DEN (a), 0) == 0);
+
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (b, size);
+
+      mpq_canonicalize (a);
+
+      ccref = ref_mpq_cmp_z (a, b);
+      cc = mpq_cmp_z (a, b);
+
+      if (SGN (ccref) != SGN (cc))
+	abort ();
+    }
+
+  mpq_clear (a);
+  mpz_clear (b);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-equal.c b/tests/mpq/t-equal.c
new file mode 100644
index 0000000..671c530
--- /dev/null
+++ b/tests/mpq/t-equal.c

@@ -0,0 +1,146 @@
+/* Test mpq_equal.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr x, mpq_srcptr y, int want)
+{
+  int  got;
+
+  MPQ_CHECK_FORMAT (x);
+  MPQ_CHECK_FORMAT (y);
+
+  got = mpq_equal (x, y);
+  if ((got != 0) != (want != 0))
+    {
+      printf ("mpq_equal got %d want %d\n", got, want);
+      mpq_trace ("x", x);
+      mpq_trace ("y", y);
+      abort ();
+    }
+}
+
+
+void
+check_all (mpq_ptr x, mpq_ptr y, int want)
+{
+  check_one (x, y, want);
+  check_one (y, x, want);
+
+  mpq_neg (x, x);
+  mpq_neg (y, y);
+
+  check_one (x, y, want);
+  check_one (y, x, want);
+}
+
+
+#define SET4Z(z, size,l3,l2,l1,l0) \
+  SIZ(z) = size; PTR(z)[3] = l3; PTR(z)[2] = l2; PTR(z)[1] = l1; PTR(z)[0] = l0
+
+#define SET4(q, nsize,n3,n2,n1,n0, dsize,d3,d2,d1,d0)   \
+  SET4Z (mpq_numref(q), nsize,n3,n2,n1,n0);             \
+  SET4Z (mpq_denref(q), dsize,d3,d2,d1,d0)
+
+
+/* Exercise various combinations of same and slightly different values. */
+
+void
+check_various (void)
+{
+  mpq_t  x, y;
+
+  mpq_init (x);
+  mpq_init (y);
+
+  mpz_realloc (mpq_numref(x), (mp_size_t) 20);
+  mpz_realloc (mpq_denref(x), (mp_size_t) 20);
+  mpz_realloc (mpq_numref(y), (mp_size_t) 20);
+  mpz_realloc (mpq_denref(y), (mp_size_t) 20);
+
+  /* 0 == 0 */
+  SET4 (x, 0,13,12,11,10, 1,23,22,21,1);
+  SET4 (y, 0,33,32,31,30, 1,43,42,41,1);
+  check_all (x, y, 1);
+
+  /* 83/99 == 83/99 */
+  SET4 (x, 1,13,12,11,83, 1,23,22,21,99);
+  SET4 (y, 1,33,32,31,83, 1,43,42,41,99);
+  check_all (x, y, 1);
+
+  /* 1:2:3:4/5:6:7 == 1:2:3:4/5:6:7 */
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 1);
+
+  /* various individual changes making != */
+  SET4 (x, 4,1,2,3,667, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,666,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,666,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+#if GMP_NUMB_BITS != 62
+  SET4 (x, 4,667,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+#endif
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,667);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,5,667,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,666,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, -4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y,  4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 1,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 2,99,5,6,7);
+  check_all (x, y, 0);
+
+  mpq_clear (x);
+  mpq_clear (y);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-get_d.c b/tests/mpq/t-get_d.c
new file mode 100644
index 0000000..98d6a5f
--- /dev/null
+++ b/tests/mpq/t-get_d.c

@@ -0,0 +1,294 @@
+/* Test mpq_get_d and mpq_set_d
+
+Copyright 1991, 1993, 1994, 1996, 2000-2003, 2012, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 8
+#endif
+
+/* VAX D floats only have an 8 bit signed exponent, so anything 2^128 or
+   bigger will overflow, that being 4 limbs. */
+#if defined (__vax) || defined (__vax__) && SIZE > 4
+#undef SIZE
+#define SIZE 4
+#define EPSIZE 3
+#else
+#define EPSIZE SIZE
+#endif
+
+void dump (mpq_t);
+
+void
+check_monotonic (int argc, char **argv)
+{
+  mpq_t a;
+  mp_size_t size;
+  int reps = 100;
+  int i, j;
+  double last_d, new_d;
+  mpq_t qlast_d, qnew_d;
+  mpq_t eps;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  /* The idea here is to test the monotonousness of mpq_get_d by adding
+     numbers to the numerator and denominator.  */
+
+  mpq_init (a);
+  mpq_init (eps);
+  mpq_init (qlast_d);
+  mpq_init (qnew_d);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (mpq_numref (a), size);
+      do
+	{
+	  size = urandom () % SIZE - SIZE/2;
+	  mpz_random2 (mpq_denref (a), size);
+	}
+      while (mpz_cmp_ui (mpq_denref (a), 0) == 0);
+
+      mpq_canonicalize (a);
+
+      last_d = mpq_get_d (a);
+      mpq_set_d (qlast_d, last_d);
+      for (j = 0; j < 10; j++)
+	{
+	  size = urandom () % EPSIZE + 1;
+	  mpz_random2 (mpq_numref (eps), size);
+	  size = urandom () % EPSIZE + 1;
+	  mpz_random2 (mpq_denref (eps), size);
+	  mpq_canonicalize (eps);
+
+	  mpq_add (a, a, eps);
+	  mpq_canonicalize (a);
+	  new_d = mpq_get_d (a);
+	  if (last_d > new_d)
+	    {
+	      printf ("\nERROR (test %d/%d): bad mpq_get_d results\n", i, j);
+	      printf ("last: %.16g\n", last_d);
+	      printf (" new: %.16g\n", new_d); dump (a);
+	      abort ();
+	    }
+	  mpq_set_d (qnew_d, new_d);
+	  MPQ_CHECK_FORMAT (qnew_d);
+	  if (mpq_cmp (qlast_d, qnew_d) > 0)
+	    {
+	      printf ("ERROR (test %d/%d): bad mpq_set_d results\n", i, j);
+	      printf ("last: %.16g\n", last_d); dump (qlast_d);
+	      printf (" new: %.16g\n", new_d); dump (qnew_d);
+	      abort ();
+	    }
+	  last_d = new_d;
+	  mpq_set (qlast_d, qnew_d);
+	}
+    }
+
+  mpq_clear (a);
+  mpq_clear (eps);
+  mpq_clear (qlast_d);
+  mpq_clear (qnew_d);
+}
+
+double
+my_ldexp (double d, int e)
+{
+  for (;;)
+    {
+      if (e > 0)
+	{
+	  if (e >= 16)
+	    {
+	      d *= 65536.0;
+	      e -= 16;
+	    }
+	  else
+	    {
+	      d *= 2.0;
+	      e -= 1;
+	    }
+	}
+      else if (e < 0)
+	{
+
+	  if (e <= -16)
+	    {
+	      d /= 65536.0;
+	      e += 16;
+	    }
+	  else
+	    {
+	      d /= 2.0;
+	      e += 1;
+	    }
+	}
+      else
+	return d;
+    }
+}
+
+#define MAXEXP 500
+
+#if defined (__vax) || defined (__vax__)
+#undef MAXEXP
+#define MAXEXP 30
+#endif
+
+void
+check_random (int argc, char **argv)
+{
+  gmp_randstate_ptr rands = RANDS;
+
+  double d;
+  mpq_t q;
+  mpz_t a, t;
+  int exp;
+
+  int test, reps = 100000;
+
+  if (argc == 2)
+     reps = 100 * atoi (argv[1]);
+
+  mpq_init (q);
+  mpz_init (a);
+  mpz_init (t);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_rrandomb (a, rands, 53);
+      mpz_urandomb (t, rands, 32);
+      exp = mpz_get_ui (t) % (2*MAXEXP) - MAXEXP;
+
+      d = my_ldexp (mpz_get_d (a), exp);
+      mpq_set_d (q, d);
+      /* Check that n/d = a * 2^exp, or
+	 d*a 2^{exp} = n */
+      mpz_mul (t, a, mpq_denref (q));
+      if (exp > 0)
+	mpz_mul_2exp (t, t, exp);
+      else
+	{
+	  if (!mpz_divisible_2exp_p (t, -exp))
+	    goto fail;
+	  mpz_div_2exp (t, t, -exp);
+	}
+      if (mpz_cmp (t, mpq_numref (q)) != 0)
+	{
+	fail:
+	  printf ("ERROR (check_random test %d): bad mpq_set_d results\n", test);
+	  printf ("%.16g\n", d);
+	  gmp_printf ("%Qd\n", q);
+	  abort ();
+	}
+    }
+  mpq_clear (q);
+  mpz_clear (t);
+  mpz_clear (a);
+}
+
+void
+dump (mpq_t x)
+{
+  mpz_out_str (stdout, 10, mpq_numref (x));
+  printf ("/");
+  mpz_out_str (stdout, 10, mpq_denref (x));
+  printf ("\n");
+}
+
+/* Check various values 2^n and 1/2^n. */
+void
+check_onebit (void)
+{
+  static const long data[] = {
+    -3*GMP_NUMB_BITS-1, -3*GMP_NUMB_BITS, -3*GMP_NUMB_BITS+1,
+    -2*GMP_NUMB_BITS-1, -2*GMP_NUMB_BITS, -2*GMP_NUMB_BITS+1,
+    -GMP_NUMB_BITS-1, -GMP_NUMB_BITS, -GMP_NUMB_BITS+1,
+    -5, -2, -1, 0, 1, 2, 5,
+    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,
+  };
+
+  int     i, neg;
+  long    exp, l;
+  mpq_t   q;
+  double  got, want;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      exp = data[i];
+
+      mpq_set_ui (q, 1L, 1L);
+      if (exp >= 0)
+	mpq_mul_2exp (q, q, exp);
+      else
+	mpq_div_2exp (q, q, -exp);
+
+      want = 1.0;
+      for (l = 0; l < exp; l++)
+	want *= 2.0;
+      for (l = 0; l > exp; l--)
+	want /= 2.0;
+
+      for (neg = 0; neg <= 1; neg++)
+	{
+	  if (neg)
+	    {
+	      mpq_neg (q, q);
+	      want = -want;
+	    }
+
+	  got = mpq_get_d (q);
+
+	  if (got != want)
+	    {
+	      printf    ("mpq_get_d wrong on %s2**%ld\n", neg ? "-" : "", exp);
+	      mpq_trace ("   q    ", q);
+	      d_trace   ("   want ", want);
+	      d_trace   ("   got  ", got);
+	      abort();
+	    }
+	}
+    }
+  mpq_clear (q);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_onebit ();
+  check_monotonic (argc, argv);
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-get_str.c b/tests/mpq/t-get_str.c
new file mode 100644
index 0000000..e586521
--- /dev/null
+++ b/tests/mpq/t-get_str.c

@@ -0,0 +1,142 @@
+/* Test mpq_get_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr q, int base, const char *want)
+{
+  char    *str, *ret;
+  size_t  str_alloc;
+
+  MPQ_CHECK_FORMAT (q);
+  mp_trace_base = base;
+
+  str_alloc =
+    mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+    mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3;
+
+  str = mpq_get_str (NULL, base, q);
+  if (strlen(str)+1 > str_alloc)
+    {
+      printf ("mpq_get_str size bigger than should be (passing NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  size %lu \"%s\"\n", (unsigned long)  strlen(str)+1, str);
+      printf ("  want size %lu\n", (unsigned long) str_alloc);
+      abort ();
+    }
+  if (strcmp (str, want) != 0)
+    {
+      printf ("mpq_get_str wrong (passing NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  \"%s\"\n", str);
+      printf ("  want \"%s\"\n", want);
+      mpq_trace ("  q", q);
+      abort ();
+    }
+  (*__gmp_free_func) (str, strlen (str) + 1);
+
+  str = (char *) (*__gmp_allocate_func) (str_alloc);
+
+  ret = mpq_get_str (str, base, q);
+  if (str != ret)
+    {
+      printf ("mpq_get_str wrong return value (passing non-NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  %p\n", (void *) ret);
+      printf ("  want %p\n", (void *) str);
+      abort ();
+    }
+  if (strcmp (str, want) != 0)
+    {
+      printf ("mpq_get_str wrong (passing non-NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  \"%s\"\n", str);
+      printf ("  want \"%s\"\n", want);
+      abort ();
+    }
+  (*__gmp_free_func) (str, str_alloc);
+}
+
+
+void
+check_all (mpq_srcptr q, int base, const char *want)
+{
+  char  *s;
+
+  check_one (q, base, want);
+
+  s = __gmp_allocate_strdup (want);
+  strtoupper (s);
+  check_one (q, -base, s);
+  (*__gmp_free_func) (s, strlen(s)+1);
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    int         base;
+    const char  *num;
+    const char  *den;
+    const char  *want;
+  } data[] = {
+    { 10, "0", "1", "0" },
+    { 10, "1", "1", "1" },
+
+    { 16, "ffffffff", "1", "ffffffff" },
+    { 16, "ffffffffffffffff", "1", "ffffffffffffffff" },
+
+    { 16, "1", "ffffffff", "1/ffffffff" },
+    { 16, "1", "ffffffffffffffff", "1/ffffffffffffffff" },
+    { 16, "1", "10000000000000003", "1/10000000000000003" },
+
+    { 10, "12345678901234567890", "9876543210987654323",
+      "12345678901234567890/9876543210987654323" },
+  };
+
+  mpq_t  q;
+  int    i;
+
+  mpq_init (q);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (mpq_numref(q), data[i].num, data[i].base);
+      mpz_set_str_or_abort (mpq_denref(q), data[i].den, data[i].base);
+      check_all (q, data[i].base, data[i].want);
+    }
+  mpq_clear (q);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-inp_str.c b/tests/mpq/t-inp_str.c
new file mode 100644
index 0000000..5964054
--- /dev/null
+++ b/tests/mpq/t-inp_str.c

@@ -0,0 +1,171 @@
+/* Test mpq_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>   /* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "/tmp/t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+    { "0/1", 10, "0", 3 },
+
+    { "0/",   10, "0", 0 },
+    { "/123", 10, "0", 0 },
+    { "blah", 10, "0", 0 },
+    { "123/blah", 10, "0", 0 },
+    { "5 /8", 10, "5", 1 },
+    { "5/ 8", 10, "0", 0 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    { "z", 36, "35", 1 },
+    { "Z", 36, "35", 1 },
+
+    {  "0x0",    0,   "0", 3 },
+    {  "0x10",   0,  "16", 4 },
+    { "-0x0",    0,   "0", 4 },
+    { "-0x10",   0, "-16", 5 },
+    { "-0x10/5", 0, "-16/5", 7 },
+
+    {  "00",   0,  "0", 2 },
+    {  "010",  0,  "8", 3 },
+    { "-00",   0,  "0", 3 },
+    { "-010",  0, "-8", 4 },
+  };
+
+  mpq_t  got, want;
+  long   ftell_nread;
+  int    i, post, j, got_nread;
+  FILE   *fp;
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (post = 0; post <= 2; post++)
+	{
+	  mpq_set_str_or_abort (want, data[i].want, 0);
+	  MPQ_CHECK_FORMAT (want);
+
+	  fp = fopen (FILENAME, "w+");
+	  ASSERT_ALWAYS (fp != NULL);
+	  fputs (data[i].inp, fp);
+	  for (j = 0; j < post; j++)
+	    putc (' ', fp);
+	  fflush (fp);
+	  ASSERT_ALWAYS (! ferror(fp));
+
+	  rewind (fp);
+	  got_nread = mpq_inp_str (got, fp, data[i].base);
+
+	  if (got_nread != 0)
+	    {
+	      ftell_nread = ftell (fp);
+	      if (got_nread != ftell_nread)
+		{
+		  printf ("mpq_inp_str nread wrong\n");
+		  printf ("  inp          \"%s\"\n", data[i].inp);
+		  printf ("  base         %d\n", data[i].base);
+		  printf ("  got_nread    %d\n", got_nread);
+		  printf ("  ftell_nread  %ld\n", ftell_nread);
+		  abort ();
+		}
+	    }
+
+	  if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+	    {
+	      int  c = getc(fp);
+	      if (c != EOF)
+		{
+		  printf ("mpq_inp_str didn't read to EOF\n");
+		  printf ("  inp         \"%s\"\n", data[i].inp);
+		  printf ("  base        %d\n", data[i].base);
+		  printf ("  c '%c' %#x\n", c, c);
+		  abort ();
+		}
+	    }
+
+	  if (got_nread != data[i].want_nread)
+	    {
+	      printf ("mpq_inp_str nread wrong\n");
+	      printf ("  inp         \"%s\"\n", data[i].inp);
+	      printf ("  base        %d\n", data[i].base);
+	      printf ("  got_nread   %d\n", got_nread);
+	      printf ("  want_nread  %d\n", data[i].want_nread);
+	      abort ();
+	    }
+
+	  MPQ_CHECK_FORMAT (got);
+
+	  if (! mpq_equal (got, want))
+	    {
+	      printf ("mpq_inp_str wrong result\n");
+	      printf ("  inp   \"%s\"\n", data[i].inp);
+	      printf ("  base  %d\n", data[i].base);
+	      mpq_trace ("  got ",  got);
+	      mpq_trace ("  want", want);
+	      abort ();
+	    }
+
+	  ASSERT_ALWAYS (fclose (fp) == 0);
+	}
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+
+  exit (0);
+}

diff --git a/tests/mpq/t-inv.c b/tests/mpq/t-inv.c
new file mode 100644
index 0000000..87ba8d4
--- /dev/null
+++ b/tests/mpq/t-inv.c

@@ -0,0 +1,60 @@
+/* Test mpq_inv (and set/get_num/den).
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mpz_t m, n;
+  const char* s = "-420000000000000000000000";
+
+  tests_start ();
+
+  mpq_inits (a, b, (mpq_ptr)0);
+  mpz_inits (m, n, (mpz_ptr)0);
+
+  mpz_set_ui (m, 13);
+  mpq_set_den (a, m);
+  mpz_set_str (m, s, 0);
+  mpq_set_num (a, m);
+  MPQ_CHECK_FORMAT (a);
+  mpq_inv (b, a);
+  MPQ_CHECK_FORMAT (b);
+  mpq_get_num (n, b);
+  ASSERT_ALWAYS (mpz_cmp_si (n, -13) == 0);
+  mpq_neg (b, b);
+  mpq_inv (a, b);
+  MPQ_CHECK_FORMAT (a);
+  mpq_inv (b, b);
+  MPQ_CHECK_FORMAT (b);
+  mpq_get_den (n, b);
+  ASSERT_ALWAYS (mpz_cmp_ui (n, 13) == 0);
+  mpq_get_num (n, a);
+  mpz_add (n, n, m);
+  ASSERT_ALWAYS (mpz_sgn (n) == 0);
+
+  mpq_clears (a, b, (mpq_ptr)0);
+  mpz_clears (m, n, (mpz_ptr)0);
+
+  tests_end ();
+  return 0;
+}

diff --git a/tests/mpq/t-md_2exp.c b/tests/mpq/t-md_2exp.c
new file mode 100644
index 0000000..2bffce9
--- /dev/null
+++ b/tests/mpq/t-md_2exp.c

@@ -0,0 +1,244 @@
+/* Test mpq_mul_2exp and mpq_div_2exp.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+struct pair_t {
+  const char     *num;
+  const char     *den;
+};
+
+void
+check_random ()
+{
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long arg_size, size_range;
+  mpq_t q, r;
+  int i;
+  mp_bitcnt_t shift;
+  int reps = 10000;
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpq_init (q);
+  mpq_init (r);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      arg_size = mpz_get_ui (bs);
+      mpz_rrandomb (mpq_numref (q), rands, arg_size);
+      do
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  arg_size = mpz_get_ui (bs);
+	  mpz_rrandomb (mpq_denref (q), rands, arg_size);
+	}
+      while (mpz_sgn (mpq_denref (q)) == 0);
+
+      /* We now have a random rational in q, albeit an unnormalised one.  The
+	 lack of normalisation should not matter here, so let's save the time a
+	 gcd would require.  */
+
+      mpz_urandomb (bs, rands, 32);
+      shift = mpz_get_ui (bs) % 4096;
+
+      mpq_mul_2exp (r, q, shift);
+
+      if (mpq_cmp (r, q) < 0)
+	{
+	  printf ("mpq_mul_2exp wrong on random\n");
+	  abort ();
+	}
+
+      mpq_div_2exp (r, r, shift);
+
+      if (mpq_cmp (r, q) != 0)
+	{
+	  printf ("mpq_mul_2exp or mpq_div_2exp wrong on random\n");
+	  abort ();
+	}
+    }
+  mpq_clear (q);
+  mpq_clear (r);
+  mpz_clear (bs);
+}
+
+int
+main (int argc, char **argv)
+{
+  static const struct {
+    struct pair_t  left;
+    unsigned long  n;
+    struct pair_t  right;
+
+  } data[] = {
+    { {"0","1"}, 0, {"0","1"} },
+    { {"0","1"}, 1, {"0","1"} },
+    { {"0","1"}, 2, {"0","1"} },
+
+    { {"1","1"}, 0, {"1","1"} },
+    { {"1","1"}, 1, {"2","1"} },
+    { {"1","1"}, 2, {"4","1"} },
+    { {"1","1"}, 3, {"8","1"} },
+
+    { {"1","1"}, 31, {"0x80000000","1"} },
+    { {"1","1"}, 32, {"0x100000000","1"} },
+    { {"1","1"}, 33, {"0x200000000","1"} },
+    { {"1","1"}, 63, {"0x8000000000000000","1"} },
+    { {"1","1"}, 64, {"0x10000000000000000","1"} },
+    { {"1","1"}, 65, {"0x20000000000000000","1"} },
+    { {"1","1"}, 95, {"0x800000000000000000000000","1"} },
+    { {"1","1"}, 96, {"0x1000000000000000000000000","1"} },
+    { {"1","1"}, 97, {"0x2000000000000000000000000","1"} },
+    { {"1","1"}, 127, {"0x80000000000000000000000000000000","1"} },
+    { {"1","1"}, 128, {"0x100000000000000000000000000000000","1"} },
+    { {"1","1"}, 129, {"0x200000000000000000000000000000000","1"} },
+
+    { {"1","2"}, 31, {"0x40000000","1"} },
+    { {"1","2"}, 32, {"0x80000000","1"} },
+    { {"1","2"}, 33, {"0x100000000","1"} },
+    { {"1","2"}, 63, {"0x4000000000000000","1"} },
+    { {"1","2"}, 64, {"0x8000000000000000","1"} },
+    { {"1","2"}, 65, {"0x10000000000000000","1"} },
+    { {"1","2"}, 95, {"0x400000000000000000000000","1"} },
+    { {"1","2"}, 96, {"0x800000000000000000000000","1"} },
+    { {"1","2"}, 97, {"0x1000000000000000000000000","1"} },
+    { {"1","2"}, 127, {"0x40000000000000000000000000000000","1"} },
+    { {"1","2"}, 128, {"0x80000000000000000000000000000000","1"} },
+    { {"1","2"}, 129, {"0x100000000000000000000000000000000","1"} },
+
+    { {"1","0x80000000"}, 30, {"1","2"} },
+    { {"1","0x80000000"}, 31, {"1","1"} },
+    { {"1","0x80000000"}, 32, {"2","1"} },
+    { {"1","0x80000000"}, 33, {"4","1"} },
+    { {"1","0x80000000"}, 62, {"0x80000000","1"} },
+    { {"1","0x80000000"}, 63, {"0x100000000","1"} },
+    { {"1","0x80000000"}, 64, {"0x200000000","1"} },
+    { {"1","0x80000000"}, 94, {"0x8000000000000000","1"} },
+    { {"1","0x80000000"}, 95, {"0x10000000000000000","1"} },
+    { {"1","0x80000000"}, 96, {"0x20000000000000000","1"} },
+    { {"1","0x80000000"}, 126, {"0x800000000000000000000000","1"} },
+    { {"1","0x80000000"}, 127, {"0x1000000000000000000000000","1"} },
+    { {"1","0x80000000"}, 128, {"0x2000000000000000000000000","1"} },
+
+    { {"1","0x100000000"}, 1, {"1","0x80000000"} },
+    { {"1","0x100000000"}, 2, {"1","0x40000000"} },
+    { {"1","0x100000000"}, 3, {"1","0x20000000"} },
+
+    { {"1","0x10000000000000000"}, 1, {"1","0x8000000000000000"} },
+    { {"1","0x10000000000000000"}, 2, {"1","0x4000000000000000"} },
+    { {"1","0x10000000000000000"}, 3, {"1","0x2000000000000000"} },
+  };
+
+  void (*fun) (mpq_ptr, mpq_srcptr, unsigned long);
+  const struct pair_t  *p_start, *p_want;
+  const char  *name;
+  mpq_t    sep, got, want;
+  mpq_ptr  q;
+  int      i, muldiv, sign, overlap;
+
+  tests_start ();
+
+  mpq_init (sep);
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (muldiv = 0; muldiv < 2; muldiv++)
+        {
+          if (muldiv == 0)
+            {
+              fun = mpq_mul_2exp;
+              name = "mpq_mul_2exp";
+              p_start = &data[i].left;
+              p_want = &data[i].right;
+            }
+          else
+            {
+              fun = mpq_div_2exp;
+              name = "mpq_div_2exp";
+              p_start = &data[i].right;
+              p_want = &data[i].left;
+            }
+
+          for (sign = 0; sign <= 1; sign++)
+            {
+              mpz_set_str_or_abort (mpq_numref(want), p_want->num, 0);
+              mpz_set_str_or_abort (mpq_denref(want), p_want->den, 0);
+              if (sign)
+                mpq_neg (want, want);
+
+              for (overlap = 0; overlap <= 1; overlap++)
+                {
+                  q = overlap ? got : sep;
+
+                  /* initial garbage in "got" */
+                  mpq_set_ui (got, 123L, 456L);
+
+                  mpz_set_str_or_abort (mpq_numref(q), p_start->num, 0);
+                  mpz_set_str_or_abort (mpq_denref(q), p_start->den, 0);
+                  if (sign)
+                    mpq_neg (q, q);
+
+                  (*fun) (got, q, data[i].n);
+                  MPQ_CHECK_FORMAT (got);
+
+                  if (! mpq_equal (got, want))
+                    {
+                      printf ("%s wrong at data[%d], sign %d, overlap %d\n",
+                              name, i, sign, overlap);
+                      printf ("   num \"%s\"\n", p_start->num);
+                      printf ("   den \"%s\"\n", p_start->den);
+                      printf ("   n   %lu\n", data[i].n);
+
+                      printf ("   got  ");
+                      mpq_out_str (stdout, 16, got);
+                      printf (" (hex)\n");
+
+                      printf ("   want ");
+                      mpq_out_str (stdout, 16, want);
+                      printf (" (hex)\n");
+
+                      abort ();
+                    }
+                }
+            }
+        }
+    }
+
+  check_random ();
+
+  mpq_clear (sep);
+  mpq_clear (got);
+  mpq_clear (want);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpq/t-set_f.c b/tests/mpq/t-set_f.c
new file mode 100644
index 0000000..e92e3aa
--- /dev/null
+++ b/tests/mpq/t-set_f.c

@@ -0,0 +1,169 @@
+/* Test mpq_set_f.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+#if GMP_NAIL_BITS == 0
+  static const struct {
+    int         f_base;
+    const char  *f;
+    int         z_base;
+    const char  *want_num;
+    const char  *want_den;
+
+  } data[] = {
+
+    { -2, "0",    16, "0", "1" },
+    { -2, "1",    16, "1", "1" },
+    { -2, "1@1",  16, "2", "1" },
+    { -2, "1@2",  16, "4", "1" },
+    { -2, "1@3",  16, "8", "1" },
+
+    { -2, "1@30", 16,  "40000000", "1" },
+    { -2, "1@31", 16,  "80000000", "1" },
+    { -2, "1@32", 16, "100000000", "1" },
+    { -2, "1@33", 16, "200000000", "1" },
+    { -2, "1@34", 16, "400000000", "1" },
+
+    { -2, "1@62", 16,  "4000000000000000", "1" },
+    { -2, "1@63", 16,  "8000000000000000", "1" },
+    { -2, "1@64", 16, "10000000000000000", "1" },
+    { -2, "1@65", 16, "20000000000000000", "1" },
+    { -2, "1@66", 16, "40000000000000000", "1" },
+
+    { -2, "1@126", 16,  "40000000000000000000000000000000", "1" },
+    { -2, "1@127", 16,  "80000000000000000000000000000000", "1" },
+    { -2, "1@128", 16, "100000000000000000000000000000000", "1" },
+    { -2, "1@129", 16, "200000000000000000000000000000000", "1" },
+    { -2, "1@130", 16, "400000000000000000000000000000000", "1" },
+
+    { -2, "1@-1",  16, "1", "2" },
+    { -2, "1@-2",  16, "1", "4" },
+    { -2, "1@-3",  16, "1", "8" },
+
+    { -2, "1@-30", 16, "1",  "40000000" },
+    { -2, "1@-31", 16, "1",  "80000000" },
+    { -2, "1@-32", 16, "1", "100000000" },
+    { -2, "1@-33", 16, "1", "200000000" },
+    { -2, "1@-34", 16, "1", "400000000" },
+
+    { -2, "1@-62", 16, "1",  "4000000000000000" },
+    { -2, "1@-63", 16, "1",  "8000000000000000" },
+    { -2, "1@-64", 16, "1", "10000000000000000" },
+    { -2, "1@-65", 16, "1", "20000000000000000" },
+    { -2, "1@-66", 16, "1", "40000000000000000" },
+
+    { -2, "1@-126", 16, "1",  "40000000000000000000000000000000" },
+    { -2, "1@-127", 16, "1",  "80000000000000000000000000000000" },
+    { -2, "1@-128", 16, "1", "100000000000000000000000000000000" },
+    { -2, "1@-129", 16, "1", "200000000000000000000000000000000" },
+    { -2, "1@-130", 16, "1", "400000000000000000000000000000000" },
+
+    { -2, "1@-30", 16, "1",  "40000000" },
+    { -2, "1@-31", 16, "1",  "80000000" },
+    { -2, "1@-32", 16, "1", "100000000" },
+    { -2, "1@-33", 16, "1", "200000000" },
+    { -2, "1@-34", 16, "1", "400000000" },
+
+    { -2, "11@-62", 16, "3",  "4000000000000000" },
+    { -2, "11@-63", 16, "3",  "8000000000000000" },
+    { -2, "11@-64", 16, "3", "10000000000000000" },
+    { -2, "11@-65", 16, "3", "20000000000000000" },
+    { -2, "11@-66", 16, "3", "40000000000000000" },
+
+    { 16, "80000000.00000001", 16, "8000000000000001", "100000000" },
+    { 16, "80000000.00000008", 16, "1000000000000001",  "20000000" },
+    { 16, "80000000.8",        16, "100000001", "2" },
+
+  };
+
+  mpf_t  f;
+  mpq_t  got;
+  mpz_t  want_num, want_den;
+  int    i, neg;
+
+  tests_start ();
+
+  mpf_init2 (f, 1024L);
+  mpq_init (got);
+  mpz_init (want_num);
+  mpz_init (want_den);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (neg = 0; neg <= 1; neg++)
+        {
+          mpf_set_str_or_abort (f, data[i].f, data[i].f_base);
+          mpz_set_str_or_abort (want_num, data[i].want_num, data[i].z_base);
+          mpz_set_str_or_abort (want_den, data[i].want_den, data[i].z_base);
+
+          if (neg)
+            {
+              mpf_neg (f, f);
+              mpz_neg (want_num, want_num);
+            }
+
+          mpq_set_f (got, f);
+          MPQ_CHECK_FORMAT (got);
+
+          if (mpz_cmp (mpq_numref(got), want_num) != 0
+              || mpz_cmp (mpq_denref(got), want_den) != 0)
+            {
+              printf ("wrong at data[%d]\n", i);
+              printf ("   f_base %d, z_base %d\n",
+                      data[i].f_base, data[i].z_base);
+
+              printf ("   f \"%s\" hex ", data[i].f);
+              mpf_out_str (stdout, 16, 0, f);
+              printf ("\n");
+
+              printf ("   want num 0x");
+              mpz_out_str (stdout, 16, want_num);
+              printf ("\n");
+              printf ("   want den 0x");
+              mpz_out_str (stdout, 16, want_den);
+              printf ("\n");
+
+              printf ("   got num 0x");
+              mpz_out_str (stdout, 16, mpq_numref(got));
+              printf ("\n");
+              printf ("   got den 0x");
+              mpz_out_str (stdout, 16, mpq_denref(got));
+              printf ("\n");
+
+              abort ();
+            }
+        }
+    }
+
+  mpf_clear (f);
+  mpq_clear (got);
+  mpz_clear (want_num);
+  mpz_clear (want_den);
+
+  tests_end ();
+#endif
+  exit (0);
+}

diff --git a/tests/mpq/t-set_str.c b/tests/mpq/t-set_str.c
new file mode 100644
index 0000000..1f5ca8b
--- /dev/null
+++ b/tests/mpq/t-set_str.c

@@ -0,0 +1,102 @@
+/* Test mpq_set_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr want, int base, const char *str)
+{
+  mpq_t   got;
+
+  MPQ_CHECK_FORMAT (want);
+  mp_trace_base = base;
+
+  mpq_init (got);
+
+  if (mpq_set_str (got, str, base) != 0)
+    {
+      printf ("mpq_set_str unexpectedly failed\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      abort ();
+    }
+  MPQ_CHECK_FORMAT (got);
+
+  if (! mpq_equal (got, want))
+    {
+      printf ("mpq_set_str wrong\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      mpq_trace ("got ", got);
+      mpq_trace ("want", want);
+      abort ();
+    }
+
+  mpq_clear (got);
+}
+
+void
+check_samples (void)
+{
+  mpq_t  q;
+
+  mpq_init (q);
+
+  mpq_set_ui (q, 0L, 1L);
+  check_one (q, 10, "0");
+  check_one (q, 10, "0/1");
+  check_one (q, 10, "0  / 1");
+  check_one (q, 0, "0x0/ 1");
+  check_one (q, 0, "0x0/ 0x1");
+  check_one (q, 0, "0 / 0x1");
+
+  check_one (q, 10, "-0");
+  check_one (q, 10, "-0/1");
+  check_one (q, 10, "-0  / 1");
+  check_one (q, 0, "-0x0/ 1");
+  check_one (q, 0, "-0x0/ 0x1");
+  check_one (q, 0, "-0 / 0x1");
+
+  mpq_set_ui (q, 255L, 256L);
+  check_one (q, 10, "255/256");
+  check_one (q, 0,  "0xFF/0x100");
+  check_one (q, 16, "FF/100");
+
+  mpq_neg (q, q);
+  check_one (q, 10, "-255/256");
+  check_one (q, 0,  "-0xFF/0x100");
+  check_one (q, 16, "-FF/100");
+
+  mpq_clear (q);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_samples ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/convert.c b/tests/mpz/convert.c
new file mode 100644
index 0000000..c449c66
--- /dev/null
+++ b/tests/mpz/convert.c

@@ -0,0 +1,186 @@
+/* Test conversion using mpz_get_str and mpz_set_str.
+
+Copyright 1993, 1994, 1996, 1999-2002, 2006, 2007, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+#include <ctype.h> /* for tolower */
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t, int);
+
+static int str_casecmp (const char *, const char *);
+
+void
+string_urandomb (char *bp, size_t len, int base, gmp_randstate_ptr rands)
+{
+  mpz_t bs;
+  unsigned long bsi;
+  int d, l;
+  const char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+  mpz_init (bs);
+
+  mpz_urandomb (bs, rands, 32);
+  bsi = mpz_get_ui (bs);
+  d = bsi % base;
+  while (len != 0)
+    {
+      l = (bsi >> 16) % 20;
+      l = MIN (l, len);
+
+      memset (bp, collseq[d], l);
+
+      len -= l;
+      bp += l;
+
+      mpz_urandomb (bs, rands, 32);
+      bsi = mpz_get_ui (bs);
+      d = bsi & 0xfff;
+      if (d >= base)
+	d = 0;
+    }
+
+  bp[0] = '\0';
+  mpz_clear (bs);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 2000;
+  char *str, *buf, *bp;
+  int base;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t len;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (op1);
+  mpz_init (op2);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* 1. Generate random mpz_t and convert to a string and back to mpz_t
+	 again.  */
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;	/* 2..18 */
+      mpz_urandomb (bs, rands, size_range);	/* 3..262144 bits */
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 32);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 62 + 1;
+      if (base == 1)
+	base = 0;
+
+      str = mpz_get_str ((char *) 0, base, op1);
+      mpz_set_str_or_abort (op2, str, base);
+
+      if (mpz_cmp (op1, op2))
+	{
+	  fprintf (stderr, "ERROR, op1 and op2 different in test %d\n", i);
+	  fprintf (stderr, "str  = %s\n", str);
+	  fprintf (stderr, "base = %d\n", base);
+	  fprintf (stderr, "op1  = "); debug_mp (op1, -16);
+	  fprintf (stderr, "op2  = "); debug_mp (op2, -16);
+	  abort ();
+	}
+
+      (*__gmp_free_func) (str, strlen (str) + 1);
+
+      /* 2. Generate random string and convert to mpz_t and back to a string
+	 again.  */
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1;	/* 1..16 */
+      mpz_urandomb (bs, rands, size_range);	/* 1..65536 digits */
+      len = mpz_get_ui (bs) + 1;
+      buf = (char *) (*__gmp_allocate_func) (len + 1);
+      if (base == 0)
+	base = 10;
+      string_urandomb (buf, len, base, rands);
+
+      mpz_set_str_or_abort (op1, buf, base);
+      str = mpz_get_str ((char *) 0, base, op1);
+
+      /* Skip over leading zeros, but don't leave the string at zero length. */
+      for (bp = buf; bp[0] == '0' && bp[1] != '\0'; bp++)
+	;
+
+      if (str_casecmp (str, bp) != 0)
+	{
+	  fprintf (stderr, "ERROR, str and buf different in test %d\n", i);
+	  fprintf (stderr, "str  = %s\n", str);
+	  fprintf (stderr, "buf  = %s\n", buf);
+	  fprintf (stderr, "base = %d\n", base);
+	  fprintf (stderr, "op1  = "); debug_mp (op1, -16);
+	  abort ();
+	}
+
+      (*__gmp_free_func) (buf, len + 1);
+      (*__gmp_free_func) (str, strlen (str) + 1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
+
+/* This is similar to POSIX strcasecmp except that we don't do the comparison
+   with unsigned char.  We avoid strcasecmp for C standard conformance.  */
+static int
+str_casecmp (const char *s1, const char *s2)
+{
+  size_t i;
+  for (i = 0;; i++)
+    {
+      int c1 = s1[i];
+      int c2 = s2[i];
+      if (c1 == 0 || tolower (c1) != tolower (c2))
+	return c1 - c2;
+    }
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/dive.c b/tests/mpz/dive.c
new file mode 100644
index 0000000..99e8caf
--- /dev/null
+++ b/tests/mpz/dive.c

@@ -0,0 +1,100 @@
+/* Test mpz_mul, mpz_divexact.
+
+Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  mpz_t prod, quot;
+  mp_size_t size;
+  int i;
+  int reps = 5000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mp_trace_base = -16;
+
+  mpz_init (bs);
+
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (prod);
+  mpz_init (quot);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2; /* 0..2047 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+
+      do
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  size = mpz_get_ui (bs);
+	  mpz_rrandomb (op2, rands, size);
+	}
+      while (mpz_sgn (op2) == 0);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+	mpz_neg (op2, op2);
+
+      mpz_mul (prod, op1, op2);
+
+      mpz_divexact (quot, prod, op2);
+      MPZ_CHECK_FORMAT (quot);
+
+      if (mpz_cmp (quot, op1) != 0)
+        {
+          printf ("Wrong results:\n");
+          mpz_trace ("  got     ", quot);
+          mpz_trace ("  want    ", op1);
+          mpz_trace ("  dividend", prod);
+          mpz_trace ("  divisor ", op2);
+          abort ();
+        }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (prod);
+  mpz_clear (quot);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/dive_ui.c b/tests/mpz/dive_ui.c
new file mode 100644
index 0000000..8f74bce
--- /dev/null
+++ b/tests/mpz/dive_ui.c

@@ -0,0 +1,86 @@
+/* Test mpz_divexact_ui.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  int    reps = 500000;
+  mpz_t  a, q, got;
+  int    i, qneg;
+  unsigned long  d;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+  mpz_init (q);
+  mpz_init (got);
+
+  for (i = 0; i < reps; i++)
+    {
+      do
+	d = (unsigned long) urandom();
+      while (d == 0);
+      mpz_erandomb (q, rands, 512);
+      mpz_mul_ui (a, q, d);
+
+      for (qneg = 0; qneg <= 1; qneg++)
+        {
+          mpz_divexact_ui (got, a, d);
+          MPZ_CHECK_FORMAT (got);
+          if (mpz_cmp (got, q) != 0)
+            {
+              printf    ("mpz_divexact_ui wrong\n");
+              mpz_trace ("    a", a);
+              printf    ("    d=%lu\n", d);
+              mpz_trace ("    q", q);
+              mpz_trace ("  got", got);
+              abort ();
+            }
+
+          mpz_neg (q, q);
+          mpz_neg (a, a);
+        }
+
+    }
+
+  mpz_clear (a);
+  mpz_clear (q);
+  mpz_clear (got);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/io.c b/tests/mpz/io.c
new file mode 100644
index 0000000..6862994
--- /dev/null
+++ b/tests/mpz/io.c

@@ -0,0 +1,151 @@
+/* Test conversion and I/O using mpz_out_str and mpz_inp_str.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2012, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>		/* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "/tmp/io.tmp"
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t  op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 10000;
+  FILE *fp;
+  int base, base_out;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t nread;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (op1);
+  mpz_init (op2);
+
+  fp = fopen (FILENAME, "w+");
+
+  if (mpz_out_str (fp, 63, op1) != 0)
+    {
+      printf ("mpz_out_str did not return 0 (error) with base > 62\n");
+      abort ();
+    }
+
+  if (mpz_out_str (fp, -37, op1) != 0)
+    {
+      printf ("mpz_out_str did not return 0 (error) with base < -37\n");
+      abort ();
+    }
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 16);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 62 + 1;
+      if (base == 1)
+	base = 0;
+
+      if (i % 2 == 0 && base <= 36)
+	base_out = -base;
+      else
+	base_out = base;
+
+      rewind (fp);
+      if (mpz_out_str (fp, base_out, op1) == 0
+	  || putc (' ', fp) == EOF
+	  || fflush (fp) != 0)
+	{
+	  printf ("mpz_out_str write error\n");
+	  abort ();
+	}
+
+      rewind (fp);
+      nread = mpz_inp_str (op2, fp, base);
+      if (nread == 0)
+	{
+	  if (ferror (fp))
+	    printf ("mpz_inp_str stream read error\n");
+	  else
+	    printf ("mpz_inp_str data conversion error\n");
+	  abort ();
+	}
+
+      if (nread != ftell(fp))
+	{
+	  printf ("mpz_inp_str nread doesn't match ftell\n");
+	  printf ("  nread  %lu\n", (unsigned long) nread);
+	  printf ("  ftell  %ld\n", ftell(fp));
+	  abort ();
+	}
+
+      if (mpz_cmp (op1, op2))
+	{
+	  printf ("ERROR\n");
+	  printf ("op1  = "); debug_mp (op1, -16);
+	  printf ("op2  = "); debug_mp (op2, -16);
+	  printf ("base = %d\n", base);
+	  abort ();
+	}
+    }
+
+  fclose (fp);
+
+  unlink (FILENAME);
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/logic.c b/tests/mpz/logic.c
new file mode 100644
index 0000000..f3b38b1
--- /dev/null
+++ b/tests/mpz/logic.c

@@ -0,0 +1,194 @@
+/* Test mpz_com, mpz_and, mpz_ior, and mpz_xor.
+
+Copyright 1993, 1994, 1996, 1997, 2001, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (void);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x, y, r1, r2;
+  mpz_t t1, t2, t3;
+  mp_size_t xsize, ysize;
+  int i;
+  int reps = 100000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (x);
+  mpz_init (y);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (t1);
+  mpz_init (t2);
+  mpz_init (t3);
+
+  mpz_set_si (x, -1);
+  mpz_set_ui (y, 0);
+  for (i = 0; i < 300; i++)
+    {
+      mpz_mul_2exp (x, x, 1);
+
+      mpz_and (r1, x, x);
+      MPZ_CHECK_FORMAT (r1);
+      if (mpz_cmp (r1, x) != 0)
+	dump_abort ();
+
+      mpz_ior (r2, x, x);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r2, x) != 0)
+	dump_abort ();
+
+      mpz_xor (t1, x, x);
+      MPZ_CHECK_FORMAT (t1);
+      if (mpz_cmp_si (t1, 0) != 0)
+	dump_abort ();
+
+      mpz_ior (t1, x, y);
+      MPZ_CHECK_FORMAT (t1);
+      if (mpz_cmp (t1, x) != 0)
+	dump_abort ();
+
+      mpz_xor (t2, x, y);
+      MPZ_CHECK_FORMAT (t2);
+      if (mpz_cmp (t2, x) != 0)
+	dump_abort ();
+
+      mpz_com (t2, x);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_xor (t3, t2, x);
+      MPZ_CHECK_FORMAT (t3);
+      if (mpz_cmp_si (t3, -1) != 0)
+	dump_abort ();
+    }
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 8 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      xsize = mpz_get_ui (bs);
+      mpz_rrandomb (x, rands, xsize);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (x, x);
+
+      mpz_urandomb (bs, rands, size_range);
+      ysize = mpz_get_ui (bs);
+      mpz_rrandomb (y, rands, ysize);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (y, y);
+
+      mpz_com (r1, x);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_com (r1, r1);
+      MPZ_CHECK_FORMAT (r1);
+      if (mpz_cmp (r1, x) != 0)
+	dump_abort ();
+
+      mpz_com (r1, y);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_com (r2, r1);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r2, y) != 0)
+	dump_abort ();
+
+      mpz_com (t1, x);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_com (t2, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_and (t3, t1, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_com (r1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_ior (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+	dump_abort ();
+
+      mpz_com (t1, x);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_com (t2, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_ior (t3, t1, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_com (r1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_and (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+	dump_abort ();
+
+      mpz_ior (t1, x, y);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_and (t2, x, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_com (t3, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_and (r1, t1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_xor (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+	dump_abort ();
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x);
+  mpz_clear (y);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (t1);
+  mpz_clear (t2);
+  mpz_clear (t3);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort ()
+{
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/reuse.c b/tests/mpz/reuse.c
new file mode 100644
index 0000000..a8f2201
--- /dev/null
+++ b/tests/mpz/reuse.c

@@ -0,0 +1,786 @@
+/* Test that routines allow reusing a source variable as destination.
+
+   Test all relevant functions except:
+	mpz_bin_ui
+	mpz_nextprime
+	mpz_mul_si
+	mpz_addmul_ui (should this really allow a+=a*c?)
+
+Copyright 1996, 1999-2002, 2009, 2012, 2013, 2016, 2020 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpz_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpz_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+void dump (const char *, mpz_t, mpz_t, mpz_t);
+
+typedef void (*dss_func) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*dsi_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*dsi_div_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*ddsi_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+typedef void (*ddss_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*ds_func) (mpz_ptr, mpz_srcptr);
+
+
+void
+mpz_xinvert (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)
+{
+  int res;
+  res = mpz_invert (r, a, b);
+  if (res == 0)
+    mpz_set_ui (r, 0);
+}
+
+struct {
+  dss_func fptr;
+  const char *fname;
+  int isdivision;
+  int isslow;
+} static dss[] =
+  { { mpz_add,     "mpz_add",	  0, 0 },
+    { mpz_sub,     "mpz_sub",	  0, 0 },
+    { mpz_mul,     "mpz_mul",	  0, 0 },
+    { mpz_cdiv_q,  "mpz_cdiv_q",  1, 0 },
+    { mpz_cdiv_r,  "mpz_cdiv_r",  1, 0 },
+    { mpz_fdiv_q,  "mpz_fdiv_q",  1, 0 },
+    { mpz_fdiv_r,  "mpz_fdiv_r",  1, 0 },
+    { mpz_tdiv_q,  "mpz_tdiv_q",  1, 0 },
+    { mpz_tdiv_r,  "mpz_tdiv_r",  1, 0 },
+    { mpz_mod,     "mpz_mod",	  1, 0 },
+    { mpz_xinvert, "mpz_xinvert", 1, 1 },
+    { mpz_gcd,     "mpz_gcd",	  0, 1 },
+    { mpz_lcm,     "mpz_lcm",	  0, 1 },
+    { mpz_and,     "mpz_and",	  0, 0 },
+    { mpz_ior,     "mpz_ior",	  0, 0 },
+    { mpz_xor,     "mpz_xor",     0, 0 }
+  };
+
+
+struct {
+  dsi_func fptr;
+  const char *fname;
+  int mod;
+} static dsi[] =
+{
+  /* Don't change order here without changing the code in main(). */
+  { mpz_add_ui,         "mpz_add_ui",	     0 },
+  { mpz_mul_ui,		"mpz_mul_ui",	     0 },
+  { mpz_sub_ui,		"mpz_sub_ui",	     0 },
+  { mpz_fdiv_q_2exp,    "mpz_fdiv_q_2exp",   0x1000 },
+  { mpz_fdiv_r_2exp,    "mpz_fdiv_r_2exp",   0x1000 },
+  { mpz_cdiv_q_2exp,    "mpz_cdiv_q_2exp",   0x1000 },
+  { mpz_cdiv_r_2exp,    "mpz_cdiv_r_2exp",   0x1000 },
+  { mpz_tdiv_q_2exp,    "mpz_tdiv_q_2exp",   0x1000 },
+  { mpz_tdiv_r_2exp,    "mpz_tdiv_r_2exp",   0x1000 },
+  { mpz_mul_2exp,	"mpz_mul_2exp",      0x100 },
+  { mpz_pow_ui,		"mpz_pow_ui",        0x10 }
+};
+
+struct {
+  dsi_div_func fptr;
+  const char *fname;
+} static dsi_div[] =
+{
+  { mpz_cdiv_q_ui,       "mpz_cdiv_q_ui" },
+  { mpz_cdiv_r_ui,       "mpz_cdiv_r_ui" },
+  { mpz_fdiv_q_ui,       "mpz_fdiv_q_ui" },
+  { mpz_fdiv_r_ui,       "mpz_fdiv_r_ui" },
+  { mpz_tdiv_q_ui,       "mpz_tdiv_q_ui" },
+  { mpz_tdiv_r_ui,       "mpz_tdiv_r_ui" }
+};
+
+struct {
+  ddsi_div_func fptr;
+  const char *fname;
+  int isslow;
+} static ddsi_div[] =
+{
+  { mpz_cdiv_qr_ui,     "mpz_cdiv_qr_ui",    0 },
+  { mpz_fdiv_qr_ui,     "mpz_fdiv_qr_ui",    0 },
+  { mpz_tdiv_qr_ui,     "mpz_tdiv_qr_ui",    0 },
+};
+
+
+struct {
+  ddss_div_func fptr;
+  const char *fname;
+  int isslow;
+} static ddss_div[] =
+{
+  { mpz_cdiv_qr,  "mpz_cdiv_qr",    0 },
+  { mpz_fdiv_qr,  "mpz_fdiv_qr",    0 },
+  { mpz_tdiv_qr,  "mpz_tdiv_qr",    0 },
+};
+
+struct {
+  ds_func fptr;
+  const char *fname;
+  int nonneg;
+} static ds[] =
+{
+  { mpz_abs,    "mpz_abs",    0 },
+  { mpz_com,    "mpz_com",    0 },
+  { mpz_neg,    "mpz_neg",    0 },
+  { mpz_sqrt,   "mpz_sqrt",   1 },
+};
+
+#define FAIL(class,indx,op1,op2,op3)					\
+  do {									\
+    dump (class[indx].fname, op1, op2, op3);				\
+    exit (1);								\
+  } while (0)
+
+#define FAIL2(fname,op1,op2,op3)					\
+  do {									\
+    dump (#fname, op1, op2, op3);					\
+    exit (1);								\
+  } while (0)
+
+
+void
+realloc_if_reducing (mpz_ptr r)
+{
+  if (ABSIZ(r) < ALLOC(r))
+      _mpz_realloc (r, ABSIZ(r));
+}
+
+#define INVOKE_RRS(desc,r1,r2,i1)					\
+  do {									\
+    if (pass & 1) realloc_if_reducing (r1);				\
+    if (pass & 2) realloc_if_reducing (r2);				\
+    (desc).fptr (r1, r2, i1);						\
+  } while (0)
+#define INVOKE_RS(desc,r1,i1)						\
+  do {									\
+    if (pass & 1) realloc_if_reducing (r1);				\
+    (desc).fptr (r1, i1);						\
+  } while (0)
+#define INVOKE_RRSS(desc,r1,r2,i1,i2)					\
+  do {									\
+    if (pass & 1) realloc_if_reducing (r1);				\
+    if (pass & 2) realloc_if_reducing (r2);				\
+    (desc).fptr (r1, r2, i1, i2);					\
+  } while (0)
+#define INVOKE_RSS(desc,r1,i1,i2)					\
+  do {									\
+    if (pass & 1) realloc_if_reducing (r1);				\
+    (desc).fptr (r1, i1, i2);						\
+  } while (0)
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  unsigned int pass, reps = 400;
+  mpz_t in1, in2, in3;
+  unsigned long int in2i;
+  mpz_t res1, res2, res3;
+  mpz_t ref1, ref2, ref3;
+  mpz_t t;
+  unsigned long int r1, r2;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (in1);
+  mpz_init (in2);
+  mpz_init (in3);
+  mpz_init (ref1);
+  mpz_init (ref2);
+  mpz_init (ref3);
+  mpz_init (res1);
+  mpz_init (res2);
+  mpz_init (res3);
+  mpz_init (t);
+
+  mpz_set_ui (res1, 1);		/* force allocation */
+  mpz_set_ui (res2, 1);		/* force allocation */
+  mpz_set_ui (res3, 1);		/* force allocation */
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+#ifndef VERBOSE
+      if (isatty (STDOUT_FILENO))
+	{
+	  printf ("\r%d/%d passes", pass, reps);
+	  fflush (stdout);
+	}
+#endif
+
+      mpz_urandomb (bs, rands, 32);
+      /* Make size_range gradually bigger with each pass. */
+      size_range = mpz_get_ui (bs) % (pass * 15 / reps + 1) + 8;
+
+#define MAKE_RANDOM_OP(in, size_range, s)				\
+  do {									\
+    mpz_urandomb (bs, rands, size_range);				\
+    if (((pass >> s) & 3) == 3) /* conditional exponential dist */	\
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % (size_range - 7) + 7);	\
+    mpz_rrandomb (in, rands, mpz_get_ui (bs));				\
+  } while (0)
+
+      MAKE_RANDOM_OP (in1, size_range, 0);
+      MAKE_RANDOM_OP (in2, size_range, 2);
+      MAKE_RANDOM_OP (in3, size_range, 4);
+#undef MAKE_RANDOM_OP
+
+#ifdef VERBOSE
+      printf("%9d%9d%8d\n",
+	     mpz_sizeinbase(in1,2),
+	     mpz_sizeinbase(in2,2),
+	     mpz_sizeinbase(in3,2));
+#endif
+
+      mpz_urandomb (bs, rands, 3);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (in1, in1);
+      if ((bsi & 2) != 0)
+	mpz_neg (in2, in2);
+      if ((bsi & 4) != 0)
+	mpz_neg (in3, in3);
+
+      for (i = 0; i < numberof (dss); i++)
+	{
+	  if (dss[i].isdivision && mpz_sgn (in2) == 0)
+	    continue;
+	  if (dss[i].isslow && size_range > 19)
+	    continue;
+
+	  (dss[i].fptr) (ref1, in1, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  INVOKE_RSS (dss[i], res1, res1, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL (dss, i, in1, in2, NULL);
+
+	  mpz_set (res1, in2);
+	  INVOKE_RSS (dss[i], res1, in1, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL (dss, i, in1, in2, NULL);
+	}
+
+      for (i = 0; i < numberof (ddss_div); i++)
+	{
+	  if (mpz_sgn (in2) == 0)
+	    continue;
+
+	  (ddss_div[i].fptr) (ref1, ref2, in1, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+	  MPZ_CHECK_FORMAT (ref2);
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  INVOKE_RRSS (ddss_div[i], res1, res2, res1, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL (ddss_div, i, in1, in2, NULL);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  INVOKE_RRSS (ddss_div[i], res1, res2, res2, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL (ddss_div, i, in1, in2, NULL);
+
+	  mpz_set (res1, in2);
+	  mpz_clobber (res2);
+	  INVOKE_RRSS (ddss_div[i], res1, res2, in1, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL (ddss_div, i, in1, in2, NULL);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in2);
+	  INVOKE_RRSS (ddss_div[i], res1, res2, in1, res2);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL (ddss_div, i, in1, in2, NULL);
+	}
+
+      for (i = 0; i < numberof (ds); i++)
+	{
+	  if (ds[i].nonneg && mpz_sgn (in1) < 0)
+	    continue;
+
+	  (ds[i].fptr) (ref1, in1);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  INVOKE_RS (ds[i], res1, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL (ds, i, in1, in2, NULL);
+	}
+
+      in2i = mpz_get_ui (in2);
+
+      for (i = 0; i < numberof (dsi); i++)
+	{
+	  if (dsi[i].mod != 0)
+	    in2i = mpz_get_ui (in2) % dsi[i].mod;
+
+	  (dsi[i].fptr) (ref1, in1, in2i);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  INVOKE_RRS (dsi[i], res1, res1, in2i);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL (dsi, i, in1, in2, NULL);
+	}
+
+      if (in2i != 0)	  /* Don't divide by 0.  */
+	{
+	  for (i = 0; i < numberof (dsi_div); i++)
+	    {
+	      r1 = (dsi_div[i].fptr) (ref1, in1, in2i);
+	      MPZ_CHECK_FORMAT (ref1);
+
+	      mpz_set (res1, in1);
+	      r2 = (dsi_div[i].fptr) (res1, res1, in2i);
+	      MPZ_CHECK_FORMAT (res1);
+	      if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
+		FAIL (dsi_div, i, in1, in2, NULL);
+	    }
+
+	  for (i = 0; i < numberof (ddsi_div); i++)
+	    {
+	      r1 = (ddsi_div[i].fptr) (ref1, ref2, in1, in2i);
+	      MPZ_CHECK_FORMAT (ref1);
+
+	      mpz_set (res1, in1);
+	      mpz_clobber (res2);
+	      r2 = (ddsi_div[i].fptr) (res1, res2, res1, in2i);
+	      MPZ_CHECK_FORMAT (res1);
+	      if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+		FAIL (ddsi_div, i, in1, in2, NULL);
+
+	      mpz_clobber (res1);
+	      mpz_set (res2, in1);
+	      (ddsi_div[i].fptr) (res1, res2, res2, in2i);
+	      MPZ_CHECK_FORMAT (res1);
+	      if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+		FAIL (ddsi_div, i, in1, in2, NULL);
+	    }
+	}
+
+      if (mpz_sgn (in1) >= 0)
+	{
+	  mpz_sqrtrem (ref1, ref2, in1);
+	  MPZ_CHECK_FORMAT (ref1);
+	  MPZ_CHECK_FORMAT (ref2);
+
+	  mpz_set (res1, in1);
+	  mpz_sqrtrem (res1, res2, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+	  mpz_set (res2, in1);
+	  mpz_sqrtrem (res1, res2, res2);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+	  mpz_set (res1, in1);
+	  mpz_sqrtrem (res1, res1, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref2, res1) != 0)
+	    FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+	}
+
+      if (mpz_sgn (in1) >= 0)
+	{
+	  mpz_root (ref1, in1, in2i % 0x100 + 1);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  mpz_root (res1, res1, in2i % 0x100 + 1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_root, in1, in2, NULL);
+	}
+
+      if (mpz_sgn (in1) >= 0)
+	{
+	  mpz_rootrem (ref1, ref2, in1, in2i % 0x100 + 1);
+	  MPZ_CHECK_FORMAT (ref1);
+	  MPZ_CHECK_FORMAT (ref2);
+
+	  mpz_set (res1, in1);
+	  mpz_rootrem (res1, res2, res1, in2i % 0x100 + 1);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL2 (mpz_rootrem, in1, in2, NULL);
+
+	  mpz_set (res2, in1);
+	  mpz_rootrem (res1, res2, res2, in2i % 0x100 + 1);
+	  MPZ_CHECK_FORMAT (res1);
+	  MPZ_CHECK_FORMAT (res2);
+	  if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+	    FAIL2 (mpz_rootrem, in1, in2, NULL);
+	}
+
+      if (size_range < 18)	/* run fewer tests since gcdext is slow */
+	{
+	  mpz_gcdext (ref1, ref2, ref3, in1, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+	  MPZ_CHECK_FORMAT (ref2);
+	  MPZ_CHECK_FORMAT (ref3);
+
+#define GCDEXT_CHECK3(i1, i2) do {					\
+	    mpz_gcdext (res1, res2, res3, i1, i2);			\
+	    MPZ_CHECK_FORMAT (res1);					\
+	    MPZ_CHECK_FORMAT (res2);					\
+	    MPZ_CHECK_FORMAT (res3);					\
+	    if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0	\
+		|| mpz_cmp (ref3, res3) != 0)				\
+	      FAIL2 (mpz_gcdext, i1, i2, NULL);				\
+	  } while (0)
+#define GCDEXT_CHECK2(i1, i2) do {					\
+	    mpz_gcdext (res1, res2, NULL, i1, i2);			\
+	    MPZ_CHECK_FORMAT (res1);					\
+	    MPZ_CHECK_FORMAT (res2);					\
+	    if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)	\
+	      FAIL2 (mpz_gcdext, i1, i2, NULL);				\
+	  } while (0)
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (res1, in2);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (res2, in2);
+
+	  mpz_clobber (res1);
+	  mpz_clobber (res2);
+	  mpz_set (res3, in1);
+	  GCDEXT_CHECK3 (res3, in2);
+
+	  mpz_set (res1, in2);
+	  mpz_clobber (res2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (in1, res1);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (in1, res2);
+
+	  mpz_clobber (res1);
+	  mpz_clobber (res2);
+	  mpz_set (res3, in2);
+	  GCDEXT_CHECK3 (in1, res3);
+
+	  mpz_set (res1, in1);
+	  mpz_set (res2, in2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (res1, res2);
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  mpz_set (res3, in2);
+	  GCDEXT_CHECK3 (res1, res3);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  mpz_set (res3, in2);
+	  GCDEXT_CHECK3 (res2, res3);
+
+	  mpz_set (res1, in2);
+	  mpz_set (res2, in1);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK3 (res2, res1);
+
+	  mpz_set (res1, in2);
+	  mpz_clobber (res2);
+	  mpz_set (res3, in1);
+	  GCDEXT_CHECK3 (res3, res1);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in2);
+	  mpz_set (res3, in1);
+	  GCDEXT_CHECK3(res3, res2);
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  GCDEXT_CHECK2 (res1, in2);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  GCDEXT_CHECK2 (res2, in2);
+
+	  mpz_set (res1, in2);
+	  mpz_clobber (res2);
+	  GCDEXT_CHECK2 (in1, res1);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in2);
+	  GCDEXT_CHECK2 (in1, res2);
+#undef GCDEXT_CHECK
+	  /* Identical inputs, gcd(in1, in1). Then the result should be
+	     gcd = abs(in1), s = 0, t = sgn(in1). */
+	  mpz_abs (ref1, in1);
+	  mpz_set_ui (ref2, 0);
+	  mpz_set_si (ref3, mpz_sgn (in1));
+
+#define GCDEXT_CHECK_SAME3(in) do {					\
+	    mpz_gcdext (res1, res2, res3, in, in);			\
+	    MPZ_CHECK_FORMAT (res1);					\
+	    MPZ_CHECK_FORMAT (res2);					\
+	    MPZ_CHECK_FORMAT (res3);					\
+	    if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0	\
+		|| mpz_cmp (ref3, res3) != 0)				\
+	      FAIL2 (mpz_gcdext, in, in, NULL);				\
+	  } while (0)
+#define GCDEXT_CHECK_SAME2(in) do {					\
+	    mpz_gcdext (res1, res2, NULL, in, in);			\
+	    MPZ_CHECK_FORMAT (res1);					\
+	    MPZ_CHECK_FORMAT (res2);					\
+	    if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)	\
+	      FAIL2 (mpz_gcdext, in, in, NULL);				\
+	  } while (0)
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK_SAME3 (res1);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK_SAME3 (res2);
+
+	  mpz_clobber (res1);
+	  mpz_clobber (res2);
+	  mpz_set (res3, in1);
+	  GCDEXT_CHECK_SAME3 (res3);
+
+	  mpz_set (res1, in1);
+	  mpz_clobber (res2);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK_SAME2 (res1);
+
+	  mpz_clobber (res1);
+	  mpz_set (res2, in1);
+	  mpz_clobber (res3);
+	  GCDEXT_CHECK_SAME2 (res2);
+#undef GCDEXT_CHECK_SAME
+	}
+
+      /* Don't run mpz_powm for huge exponents or when undefined.  */
+      if (size_range < 17 && mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+	  && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
+	{
+	  mpz_powm (ref1, in1, in2, in3);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  mpz_powm (res1, res1, in2, in3);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_powm, in1, in2, in3);
+
+	  mpz_set (res1, in2);
+	  mpz_powm (res1, in1, res1, in3);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_powm, in1, in2, in3);
+
+	  mpz_set (res1, in3);
+	  mpz_powm (res1, in1, in2, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_powm, in1, in2, in3);
+	}
+
+      /* Don't run mpz_powm_ui when undefined.  */
+      if (size_range < 17 && mpz_sgn (in3) != 0)
+	{
+	  mpz_powm_ui (ref1, in1, in2i, in3);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  mpz_powm_ui (res1, res1, in2i, in3);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_powm_ui, in1, in2, in3);
+
+	  mpz_set (res1, in3);
+	  mpz_powm_ui (res1, in1, in2i, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_powm_ui, in1, in2, in3);
+	}
+
+      {
+	r1 = mpz_gcd_ui (ref1, in1, in2i);
+	MPZ_CHECK_FORMAT (ref1);
+
+	mpz_set (res1, in1);
+	r2 = mpz_gcd_ui (res1, res1, in2i);
+	MPZ_CHECK_FORMAT (res1);
+	if (mpz_cmp (ref1, res1) != 0)
+	  FAIL2 (mpz_gcd_ui, in1, in2, NULL);
+      }
+
+      if (mpz_sgn (in2) != 0)
+	{
+	  /* Test mpz_remove */
+	  mp_bitcnt_t refretval, retval;
+	  refretval = mpz_remove (ref1, in1, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, in1);
+	  retval = mpz_remove (res1, res1, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
+	    FAIL2 (mpz_remove, in1, in2, NULL);
+
+	  mpz_set (res1, in2);
+	  retval = mpz_remove (res1, in1, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
+	    FAIL2 (mpz_remove, in1, in2, NULL);
+	}
+
+      if (mpz_sgn (in2) != 0)
+	{
+	  /* Test mpz_divexact */
+	  mpz_mul (t, in1, in2);
+	  mpz_divexact (ref1, t, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, t);
+	  mpz_divexact (res1, res1, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_divexact, t, in2, NULL);
+
+	  mpz_set (res1, in2);
+	  mpz_divexact (res1, t, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_divexact, t, in2, NULL);
+	}
+
+      if (mpz_sgn (in2) > 0)
+	{
+	  /* Test mpz_divexact_gcd, same as mpz_divexact */
+	  mpz_mul (t, in1, in2);
+	  mpz_divexact_gcd (ref1, t, in2);
+	  MPZ_CHECK_FORMAT (ref1);
+
+	  mpz_set (res1, t);
+	  mpz_divexact_gcd (res1, res1, in2);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+
+	  mpz_set (res1, in2);
+	  mpz_divexact_gcd (res1, t, res1);
+	  MPZ_CHECK_FORMAT (res1);
+	  if (mpz_cmp (ref1, res1) != 0)
+	    FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+	}
+    }
+
+  if (isatty (STDOUT_FILENO))
+    printf ("\r%20s", "");
+
+  mpz_clear (bs);
+  mpz_clear (in1);
+  mpz_clear (in2);
+  mpz_clear (in3);
+  mpz_clear (ref1);
+  mpz_clear (ref2);
+  mpz_clear (ref3);
+  mpz_clear (res1);
+  mpz_clear (res2);
+  mpz_clear (res3);
+  mpz_clear (t);
+
+  if (isatty (STDOUT_FILENO))
+    printf ("\r");
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump (const char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+{
+  printf ("failure in %s (", name);
+  mpz_out_str (stdout, -16, in1);
+  if (in2 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in2);
+    }
+  if (in3 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in3);
+    }
+  printf (")\n");
+}
+
+#endif /* ! DLL_EXPORT */

diff --git a/tests/mpz/t-addsub.c b/tests/mpz/t-addsub.c
new file mode 100644
index 0000000..aaa3188
--- /dev/null
+++ b/tests/mpz/t-addsub.c

@@ -0,0 +1,121 @@
+/* Test mpz_add, mpz_sub, mpz_add_ui, mpz_sub_ui, and mpz_ui_sub.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void debug_mp (mpz_t, int);
+void dump_abort (int, const char *, mpz_t, mpz_t);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, r1, r2;
+  mp_size_t op1n, op2n;
+  unsigned long int op2long;
+  int i;
+  int reps = 100000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (r1);
+  mpz_init (r2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      op1n = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, op1n);
+
+      mpz_urandomb (bs, rands, size_range);
+      op2n = mpz_get_ui (bs);
+      mpz_rrandomb (op2, rands, op2n);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+	mpz_neg (op2, op2);
+
+      /* printf ("%ld %ld\n", SIZ (multiplier), SIZ (multiplicand)); */
+
+      mpz_add (r1, op1, op2);
+      mpz_sub (r2, r1, op2);
+      if (mpz_cmp (r2, op1) != 0)
+	dump_abort (i, "mpz_add or mpz_sub incorrect", op1, op2);
+
+      if (mpz_fits_ulong_p (op2))
+	{
+	  op2long = mpz_get_ui (op2);
+	  mpz_add_ui (r1, op1, op2long);
+	  mpz_sub_ui (r2, r1, op2long);
+	  if (mpz_cmp (r2, op1) != 0)
+	    dump_abort (i, "mpz_add_ui or mpz_sub_ui incorrect", op1, op2);
+
+	  mpz_ui_sub (r1, op2long, op1);
+	  mpz_sub_ui (r2, op1, op2long);
+	  mpz_neg (r2, r2);
+	  if (mpz_cmp (r1, r2) != 0)
+	    dump_abort (i, "mpz_add_ui or mpz_ui_sub incorrect", op1, op2);
+	}
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (r1);
+  mpz_clear (r2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (int i, const char *s, mpz_t op1, mpz_t op2)
+{
+  fprintf (stderr, "ERROR: %s in test %d\n", s, i);
+  fprintf (stderr, "op1 = "); debug_mp (op1, -16);
+  fprintf (stderr, "op2 = "); debug_mp (op2, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-aorsmul.c b/tests/mpz/t-aorsmul.c
new file mode 100644
index 0000000..4bd435d
--- /dev/null
+++ b/tests/mpz/t-aorsmul.c

@@ -0,0 +1,464 @@
+/* Test mpz_addmul, mpz_addmul_ui, mpz_submul, mpz_submul_ui.
+
+Copyright 2001, 2002, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define M GMP_NUMB_MAX
+
+
+void
+check_one_inplace (mpz_srcptr w, mpz_srcptr y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul (want, w, y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul inplace fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("y", y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul (want, w, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul inplace fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_one_ui_inplace (mpz_ptr w, unsigned long y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul_ui (want, w, (unsigned long) y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul_ui (got, got, (unsigned long) y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul_ui fail\n");
+    fail:
+      mpz_trace ("w", w);
+      printf    ("y=0x%lX   %lu\n", y, y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul_ui (want, w, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul_ui (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul_ui fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_all_inplace (mpz_ptr w, mpz_ptr y)
+{
+  int  wneg, yneg;
+
+  MPZ_CHECK_FORMAT (w);
+  MPZ_CHECK_FORMAT (y);
+
+  for (wneg = 0; wneg < 2; wneg++)
+    {
+      for (yneg = 0; yneg < 2; yneg++)
+        {
+          check_one_inplace (w, y);
+
+          if (mpz_fits_ulong_p (y))
+            check_one_ui_inplace (w, mpz_get_ui (y));
+
+          mpz_neg (y, y);
+        }
+      mpz_neg (w, w);
+    }
+}
+
+void
+check_one (mpz_srcptr w, mpz_srcptr x, mpz_srcptr y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul (want, x, y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+
+  mpz_sub (want, want, w);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_sqr (mpz_srcptr w, mpz_srcptr x)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul (want, x, x);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul (got, x, x);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul xx fail\n");
+    sqrfail:
+      mpz_trace ("w", w);
+      mpz_trace ("x", x);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_sub (want, want, w);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul (got, x, x);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul xx fail\n");
+      goto sqrfail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_one_ui (mpz_ptr w, mpz_ptr x, unsigned long y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul_ui (want, x, (unsigned long) y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul_ui (got, x, (unsigned long) y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul_ui fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("x", x);
+      printf    ("y=0x%lX   %lu\n", y, y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul_ui (want, x, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul_ui (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul_ui fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+
+void
+check_all (mpz_ptr w, mpz_ptr x, mpz_ptr y)
+{
+  int    swap, wneg, xneg, yneg;
+
+  MPZ_CHECK_FORMAT (w);
+  MPZ_CHECK_FORMAT (x);
+  MPZ_CHECK_FORMAT (y);
+
+  for (swap = 0; swap < 2; swap++)
+    {
+      for (wneg = 0; wneg < 2; wneg++)
+        {
+          for (xneg = 0; xneg < 2; xneg++)
+            {
+              for (yneg = 0; yneg < 2; yneg++)
+                {
+                  check_one (w, x, y);
+
+                  if (mpz_fits_ulong_p (y))
+                    check_one_ui (w, x, mpz_get_ui (y));
+
+                  mpz_neg (y, y);
+                }
+
+	      check_sqr (w, x);
+
+              mpz_neg (x, x);
+            }
+          mpz_neg (w, w);
+        }
+      mpz_swap (x, y);
+    }
+}
+
+void
+check_data_inplace_ui (void)
+{
+  static const struct {
+    mp_limb_t      w[6];
+    unsigned long  y;
+
+  } data[] = {
+
+    { { 0 }, 0 },
+    { { 0 }, 1 },
+    { { 1 }, 1 },
+    { { 2 }, 1 },
+
+    { { 123 }, 1 },
+    { { 123 }, ULONG_MAX },
+    { { M }, 1 },
+    { { M }, ULONG_MAX },
+
+    { { 123, 456 }, 1 },
+    { { M, M }, 1 },
+    { { 123, 456 }, ULONG_MAX },
+    { { M, M }, ULONG_MAX },
+
+    { { 123, 456, 789 }, 1 },
+    { { M, M, M }, 1 },
+    { { 123, 456, 789 }, ULONG_MAX },
+    { { M, M, M }, ULONG_MAX },
+  };
+
+  mpz_t  w, y;
+  int    i;
+
+  mpz_init (w);
+  mpz_init (y);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));
+      mpz_set_ui (y, data[i].y);
+      check_all_inplace (w, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (y);
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    mp_limb_t  w[6];
+    mp_limb_t  x[6];
+    mp_limb_t  y[6];
+
+  } data[] = {
+
+    /* reducing to zero */
+    { { 1 }, { 1 }, { 1 } },
+    { { 2 }, { 1 }, { 2 } },
+    { { 0,1 }, { 0,1 }, { 1 } },
+
+    /* reducing to 1 */
+    { { 0,1 },       { M },       { 1 } },
+    { { 0,0,1 },     { M,M },     { 1 } },
+    { { 0,0,0,1 },   { M,M,M },   { 1 } },
+    { { 0,0,0,0,1 }, { M,M,M,M }, { 1 } },
+
+    /* reducing to -1 */
+    { { M },       { 0,1 },       { 1 } },
+    { { M,M },     { 0,0,1 },     { 1 } },
+    { { M,M,M },   { 0,0,0,1 },   { 1 } },
+    { { M,M,M,M }, { 0,0,0,0,1 }, { 1 } },
+
+    /* carry out of addmul */
+    { { M },     { 1 }, { 1 } },
+    { { M,M },   { 1 }, { 1 } },
+    { { M,M,M }, { 1 }, { 1 } },
+
+    /* borrow from submul */
+    { { 0,1 },     { 1 }, { 1 } },
+    { { 0,0,1 },   { 1 }, { 1 } },
+    { { 0,0,0,1 }, { 1 }, { 1 } },
+
+    /* borrow from submul */
+    { { 0,0,1 },     { 0,1 }, { 1 } },
+    { { 0,0,0,1 },   { 0,1 }, { 1 } },
+    { { 0,0,0,0,1 }, { 0,1 }, { 1 } },
+
+    /* more borrow from submul */
+    { { M }, { 0,1 },       { 1 } },
+    { { M }, { 0,0,1 },     { 1 } },
+    { { M }, { 0,0,0,1 },   { 1 } },
+    { { M }, { 0,0,0,0,1 }, { 1 } },
+
+    /* big borrow from submul */
+    { { 0,0,1 },     { M,M }, { M } },
+    { { 0,0,0,1 },   { M,M }, { M } },
+    { { 0,0,0,0,1 }, { M,M }, { M } },
+
+    /* small w */
+    { { 0,1 }, { M,M },       { M } },
+    { { 0,1 }, { M,M,M },     { M } },
+    { { 0,1 }, { M,M,M,M },   { M } },
+    { { 0,1 }, { M,M,M,M,M }, { M } },
+  };
+
+  mpz_t  w, x, y;
+  int    i;
+
+  mpz_init (w);
+  mpz_init (x);
+  mpz_init (y);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));
+      mpz_set_n (x, data[i].x, (mp_size_t) numberof(data[i].x));
+      mpz_set_n (y, data[i].y, (mp_size_t) numberof(data[i].y));
+      check_all (w, x, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t  w, x, y;
+  int    i, reps = 2000;
+
+  mpz_init (w);
+  mpz_init (x);
+  mpz_init (y);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_errandomb (w, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (x, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (y, rands, 5*GMP_LIMB_BITS);
+      check_all (w, x, y);
+      check_all_inplace (w, y);
+
+      mpz_errandomb (w, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (x, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (y, rands, BITS_PER_ULONG);
+      check_all (w, x, y);
+      check_all_inplace (w, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+  check_data_inplace_ui ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-bin.c b/tests/mpz/t-bin.c
new file mode 100644
index 0000000..b647be4
--- /dev/null
+++ b/tests/mpz/t-bin.c

@@ -0,0 +1,328 @@
+/* Exercise mpz_bin_ui and mpz_bin_uiui.
+
+Copyright 2000, 2001, 2010, 2012, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Default number of generated tests. */
+#define COUNT 700
+
+void
+try_mpz_bin_ui (mpz_srcptr want, mpz_srcptr n, unsigned long k)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+  mpz_bin_ui (got, n, k);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_bin_ui wrong\n");
+      printf ("  n="); mpz_out_str (stdout, 10, n); printf ("\n");
+      printf ("  k=%lu\n", k);
+      printf ("  got="); mpz_out_str (stdout, 10, got); printf ("\n");
+      printf ("  want="); mpz_out_str (stdout, 10, want); printf ("\n");
+      abort();
+    }
+  mpz_clear (got);
+}
+
+
+void
+try_mpz_bin_uiui (mpz_srcptr want, unsigned long n, unsigned long k)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+  mpz_bin_uiui (got, n, k);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_bin_uiui wrong\n");
+      printf ("  n=%lu\n", n);
+      printf ("  k=%lu\n", k);
+      printf ("  got="); mpz_out_str (stdout, 10, got); printf ("\n");
+      printf ("  want="); mpz_out_str (stdout, 10, want); printf ("\n");
+      abort();
+    }
+  mpz_clear (got);
+}
+
+
+void
+samples (void)
+{
+  static const struct {
+    const char     *n;
+    unsigned long  k;
+    const char     *want;
+  } data[] = {
+
+    {   "0", 123456, "0" },
+    {   "1", 543210, "0" },
+    {   "2", 123321, "0" },
+    {   "3", 234567, "0" },
+    {   "10", 23456, "0" },
+
+    /* negatives, using bin(-n,k)=bin(n+k-1,k) */
+    {   "-1",  0,  "1"  },
+    {   "-1",  1, "-1"  },
+    {   "-1",  2,  "1"  },
+    {   "-1",  3, "-1"  },
+    {   "-1",  4,  "1"  },
+
+    {   "-2",  0,  "1"  },
+    {   "-2",  1, "-2"  },
+    {   "-2",  2,  "3"  },
+    {   "-2",  3, "-4"  },
+    {   "-2",  4,  "5"  },
+    {   "-2",  5, "-6"  },
+    {   "-2",  6,  "7"  },
+
+    {   "-3",  0,   "1"  },
+    {   "-3",  1,  "-3"  },
+    {   "-3",  2,   "6"  },
+    {   "-3",  3, "-10"  },
+    {   "-3",  4,  "15"  },
+    {   "-3",  5, "-21"  },
+    {   "-3",  6,  "28"  },
+
+    /* A few random values */
+    {   "41", 20,  "269128937220" },
+    {   "62", 37,  "147405545359541742" },
+    {   "50", 18,  "18053528883775" },
+    {  "149", 21,  "19332950844468483467894649" },
+  };
+
+  mpz_t  n, want;
+  int    i;
+
+  mpz_init (n);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      try_mpz_bin_ui (want, n, data[i].k);
+
+      if (mpz_fits_ulong_p (n))
+	try_mpz_bin_uiui (want, mpz_get_ui (n), data[i].k);
+    }
+
+  mpz_clear (n);
+  mpz_clear (want);
+}
+
+
+/* Test some bin(2k,k) cases.  This produces some biggish numbers to
+   exercise the limb accumulating code.  */
+void
+twos (int count)
+{
+  mpz_t          n, want;
+  unsigned long  k;
+
+  mpz_init (n);
+
+  mpz_init_set_ui (want, (unsigned long) 2);
+  for (k = 1; k < count; k++)
+    {
+      mpz_set_ui (n, 2*k);
+      try_mpz_bin_ui (want, n, k);
+
+      try_mpz_bin_uiui (want, 2*k, k);
+
+      mpz_mul_ui (want, want, 2*(2*k+1));
+      mpz_fdiv_q_ui (want, want, k+1);
+    }
+
+  mpz_clear (n);
+  mpz_clear (want);
+}
+
+/* Test some random bin(n,k) cases.  This produces some biggish
+   numbers to exercise the limb accumulating code.  */
+void
+randomwalk (int count)
+{
+  mpz_t          n_z, want, tmp;
+  unsigned long  n, k, i, r;
+  int            tests;
+  gmp_randstate_ptr rands;
+
+  rands = RANDS;
+  mpz_init (n_z);
+
+  k = 3;
+  n = 12;
+  mpz_init_set_ui (want, (unsigned long) 220); /* binomial(12,3) = 220 */
+
+  for (tests = 1; tests < count; tests++)
+    {
+      r = gmp_urandomm_ui (rands, 62) + 1;
+      for (i = r & 7; i > 0; i--)
+	{
+	  n++; k++;
+	  mpz_mul_ui (want, want, n);
+	  mpz_divexact_ui (want, want, k);
+	}
+      for (i = r >> 3; i > 0; i--)
+	{
+	  n++;
+	  mpz_mul_ui (want, want, n);
+	  mpz_divexact_ui (want, want, n - k);
+	}
+
+      mpz_set_ui (n_z, n);
+      try_mpz_bin_ui (want, n_z, k);
+
+      try_mpz_bin_uiui (want, n, k);
+    }
+
+  k = 2;
+  mpz_urandomb (n_z, rands, 200);
+  mpz_mul (want, n_z, n_z); /* want = n_z ^ 2 */
+  mpz_sub (want, want, n_z); /* want = n_z ^ 2 - n_z = n_z (n_z- 1) */
+  mpz_tdiv_q_2exp (want, want, 1); /* want = n_z (n_z- 1) / 2 = binomial (n_z, 2) */
+  mpz_init (tmp);
+  for (tests = 1; tests < count; tests++)
+    {
+      r = gmp_urandomm_ui (rands, 62) + 1;
+      for (i = r & 7; i > 0; i--)
+	{
+	  k++;
+	  mpz_add_ui (n_z, n_z, 1);
+	  mpz_mul (want, want, n_z);
+	  mpz_divexact_ui (want, want, k);
+	}
+      for (i = r >> 3; i > 0; i--)
+	{
+	  mpz_add_ui (n_z, n_z, 1);
+	  mpz_mul (want, want, n_z);
+	  mpz_sub_ui (tmp, n_z, k);
+	  mpz_divexact (want, want, tmp);
+	}
+
+      try_mpz_bin_ui (want, n_z, k);
+    }
+
+  mpz_clear (tmp);
+  mpz_clear (n_z);
+  mpz_clear (want);
+}
+
+/* Test some random bin(n,k) cases.  This produces some biggish
+   numbers to exercise the limb accumulating code.  */
+void
+randomwalk_down (int count)
+{
+  mpz_t          n_z, want, tmp;
+  unsigned long  n, k, i, r;
+  int            tests;
+  gmp_randstate_ptr rands;
+
+  rands = RANDS;
+  mpz_init (n_z);
+  mpz_init (tmp);
+
+  k = 2;
+  n = ULONG_MAX;
+  mpz_init_set_ui (want, n);
+  mpz_mul_ui (want, want, n >> 1);
+
+  for (tests = 1; tests < count; tests++)
+    {
+      r = gmp_urandomm_ui (rands, 62) + 1;
+      for (i = r & 7; i > 0; i--)
+	{
+	  mpz_mul_ui (want, want, n - k);
+	  ++k;
+	  mpz_divexact_ui (want, want, k);
+	}
+      for (i = r >> 3; i > 0; i--)
+	{
+	  mpz_mul_ui (want, want, n - k);
+	  mpz_divexact_ui (want, want, n);
+	  --n;
+	}
+
+      mpz_set_ui (n_z, n);
+      try_mpz_bin_ui (want, n_z, n - k);
+
+      try_mpz_bin_uiui (want, n, n - k);
+    }
+
+  mpz_clear (tmp);
+  mpz_clear (n_z);
+  mpz_clear (want);
+}
+
+
+/* Test all bin(n,k) cases, with 0 <= k <= n + 1 <= count.  */
+void
+smallexaustive (unsigned int count)
+{
+  mpz_t          n_z, want;
+  unsigned long  n, k;
+
+  mpz_init (n_z);
+  mpz_init (want);
+
+  for (n = 0; n < count; n++)
+    {
+      mpz_set_ui (want, (unsigned long) 1);
+      mpz_set_ui (n_z, n);
+      for (k = 0; k <= n; k++)
+	{
+	  try_mpz_bin_ui (want, n_z, k);
+	  try_mpz_bin_uiui (want, n, k);
+	  mpz_mul_ui (want, want, n - k);
+	  mpz_fdiv_q_ui (want, want, k + 1);
+	}
+      try_mpz_bin_ui (want, n_z, k);
+      try_mpz_bin_uiui (want, n, k);
+    }
+
+  mpz_clear (n_z);
+  mpz_clear (want);
+}
+
+int
+main (int argc, char **argv)
+{
+  int count;
+
+  count = COUNT;
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+
+  samples ();
+  smallexaustive (count >> 4);
+  twos (count >> 1);
+  randomwalk (count - (count >> 1));
+  randomwalk_down (count >> 1);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-bit.c b/tests/mpz/t-bit.c
new file mode 100644
index 0000000..cfcdeea
--- /dev/null
+++ b/tests/mpz/t-bit.c

@@ -0,0 +1,405 @@
+/* Test mpz_setbit, mpz_clrbit, mpz_tstbit.
+
+Copyright 1997, 2000-2003, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+
+void
+debug_mp (mpz_srcptr x, int base)
+{
+  mpz_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+
+/* exercise the case where mpz_clrbit or mpz_combit ends up extending a
+   value like -2^(k*GMP_NUMB_BITS-1) when clearing bit k*GMP_NUMB_BITS-1.  */
+/* And vice-versa. */
+void
+check_clr_extend (void)
+{
+  mpz_t          got, want;
+  unsigned long  i;
+  int            f;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 1; i < 5; i++)
+    {
+      for (f = 0; f <= 1; f++)
+	{
+	  /* lots of 1 bits in _mp_d */
+	  mpz_set_si (got, 1L);
+	  mpz_mul_2exp (got, got, 10*GMP_NUMB_BITS);
+	  mpz_sub_ui (got, got, 1L);
+
+	  /* value -2^(n-1) representing ..11100..00 */
+	  mpz_set_si (got, -1L);
+	  mpz_mul_2exp (got, got, i*GMP_NUMB_BITS-1);
+
+	  /* complement bit n, giving ..11000..00 which is -2^n */
+	  if (f == 0)
+	    mpz_clrbit (got, i*GMP_NUMB_BITS-1);
+	  else
+	    mpz_combit (got, i*GMP_NUMB_BITS-1);
+	  MPZ_CHECK_FORMAT (got);
+
+	  mpz_set_si (want, -1L);
+	  mpz_mul_2exp (want, want, i*GMP_NUMB_BITS);
+
+	  if (mpz_cmp (got, want) != 0)
+	    {
+	      if (f == 0)
+		printf ("mpz_clrbit: ");
+	      else
+		printf ("mpz_combit: ");
+	      printf ("wrong after extension\n");
+	      mpz_trace ("got ", got);
+	      mpz_trace ("want", want);
+	      abort ();
+	    }
+
+	  /* complement bit n, going back to ..11100..00 which is -2^(n-1) */
+	  if (f == 0)
+	    mpz_setbit (got, i*GMP_NUMB_BITS-1);
+	  else
+	    mpz_combit (got, i*GMP_NUMB_BITS-1);
+	  MPZ_CHECK_FORMAT (got);
+
+	  mpz_set_si (want, -1L);
+	  mpz_mul_2exp (want, want, i*GMP_NUMB_BITS - 1);
+
+	  if (mpz_cmp (got, want) != 0)
+	    {
+	      if (f == 0)
+		printf ("mpz_setbit: ");
+	      else
+		printf ("mpz_combit: ");
+	      printf ("wrong after shrinking\n");
+	      mpz_trace ("got ", got);
+	      mpz_trace ("want", want);
+	      abort ();
+	    }
+	}
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_com_negs (void)
+{
+  static const struct {
+    unsigned long  bit;
+    mp_size_t      inp_size;
+    mp_limb_t      inp_n[5];
+    mp_size_t      want_size;
+    mp_limb_t      want_n[5];
+  } data[] = {
+    { GMP_NUMB_BITS,   2, { 1, 1 },  1, { 1 } },
+    { GMP_NUMB_BITS+1, 2, { 1, 1 },  2, { 1, 3 } },
+
+    { GMP_NUMB_BITS,   2, { 0, 1 },  2, { 0, 2 } },
+    { GMP_NUMB_BITS+1, 2, { 0, 1 },  2, { 0, 3 } },
+  };
+  mpz_t  inp, got, want;
+  int    i;
+
+  mpz_init (got);
+  mpz_init (want);
+  mpz_init (inp);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (inp, data[i].inp_n, data[i].inp_size);
+      mpz_neg (inp, inp);
+
+      mpz_set_n (want, data[i].want_n, data[i].want_size);
+      mpz_neg (want, want);
+
+      mpz_set (got, inp);
+      mpz_combit (got, data[i].bit);
+
+      if (mpz_cmp (got, want) != 0)
+	{
+	  printf ("mpz_combit: wrong on neg data[%d]\n", i);
+	  mpz_trace ("inp ", inp);
+	  printf    ("bit %lu\n", data[i].bit);
+	  mpz_trace ("got ", got);
+	  mpz_trace ("want", want);
+	  abort ();
+	}
+    }
+
+  mpz_clear (inp);
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+/* See that mpz_tstbit matches a twos complement calculated explicitly, for
+   various low zeros.  */
+void
+check_tstbit (void)
+{
+#define MAX_ZEROS  3
+#define NUM_LIMBS  3
+
+  mp_limb_t      pos[1+NUM_LIMBS+MAX_ZEROS];
+  mp_limb_t      neg[1+NUM_LIMBS+MAX_ZEROS];
+  mpz_t          z;
+  unsigned long  i;
+  int            zeros, low1;
+  int            got, want;
+
+  mpz_init (z);
+  for (zeros = 0; zeros <= MAX_ZEROS; zeros++)
+    {
+      MPN_ZERO (pos, numberof(pos));
+      mpn_random2 (pos+zeros, (mp_size_t) NUM_LIMBS);
+
+      for (low1 = 0; low1 <= 1; low1++)
+	{
+	  if (low1)
+	    pos[0] |= 1;
+
+	  refmpn_neg (neg, pos, (mp_size_t) numberof(neg));
+	  mpz_set_n (z, neg, (mp_size_t) numberof(neg));
+	  mpz_neg (z, z);
+
+	  for (i = 0; i < numberof(pos)*GMP_NUMB_BITS; i++)
+	    {
+	      got = mpz_tstbit (z, i);
+	      want = refmpn_tstbit (pos, i);
+	      if (got != want)
+		{
+		  printf ("wrong at bit %lu, with %d zeros\n", i, zeros);
+		  printf ("z neg "); debug_mp (z, -16);
+		  mpz_set_n (z, pos, (mp_size_t) numberof(pos));
+		  printf ("pos   "); debug_mp (z, -16);
+		  mpz_set_n (z, neg, (mp_size_t) numberof(neg));
+		  printf ("neg   "); debug_mp (z, -16);
+		  exit (1);
+		}
+	    }
+	}
+    }
+  mpz_clear (z);
+}
+
+
+void
+check_single (void)
+{
+  mpz_t  x;
+  int    limb, offset, initial;
+  unsigned long  bit;
+
+  mpz_init (x);
+
+  for (limb = 0; limb < 4; limb++)
+    {
+      for (offset = (limb==0 ? 0 : -2); offset <= 2; offset++)
+	{
+	  for (initial = 1; initial >= -1; initial--)
+	    {
+	      mpz_set_si (x, (long) initial);
+
+	      bit = (unsigned long) limb*GMP_LIMB_BITS + offset;
+
+	      mpz_clrbit (x, bit);
+	      MPZ_CHECK_FORMAT (x);
+	      if (mpz_tstbit (x, bit) != 0)
+		{
+		  printf ("check_single(): expected 0\n");
+		  abort ();
+		}
+
+	      mpz_setbit (x, bit);
+	      MPZ_CHECK_FORMAT (x);
+	      if (mpz_tstbit (x, bit) != 1)
+		{
+		  printf ("check_single(): expected 1\n");
+		  abort ();
+		}
+
+	      mpz_clrbit (x, bit);
+	      MPZ_CHECK_FORMAT (x);
+	      if (mpz_tstbit (x, bit) != 0)
+		{
+		  printf ("check_single(): expected 0\n");
+		  abort ();
+		}
+
+	      mpz_combit (x, bit);
+	      MPZ_CHECK_FORMAT (x);
+	      if (mpz_tstbit (x, bit) != 1)
+		{
+		  printf ("check_single(): expected 1\n");
+		  abort ();
+		}
+
+	      mpz_combit (x, bit);
+	      MPZ_CHECK_FORMAT (x);
+	      if (mpz_tstbit (x, bit) != 0)
+		{
+		  printf ("check_single(): expected 0\n");
+		  abort ();
+		}
+	    }
+	}
+    }
+
+  mpz_clear (x);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  mpz_t x, s0, s1, s2, s3, m;
+  mp_size_t xsize;
+  int i;
+  int reps = 100000;
+  int bit0, bit1, bit2, bit3;
+  unsigned long int bitindex;
+  const char  *s = "";
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (x);
+  mpz_init (s0);
+  mpz_init (s1);
+  mpz_init (s2);
+  mpz_init (s3);
+  mpz_init (m);
+
+  for (i = 0; i < reps; i++)
+    {
+      xsize = urandom () % (2 * SIZE) - SIZE;
+      mpz_random2 (x, xsize);
+      bitindex = urandom () % SIZE;
+
+      mpz_set (s0, x);
+      bit0 = mpz_tstbit (x, bitindex);
+      mpz_setbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s1, x);
+      bit1 = mpz_tstbit (x, bitindex);
+      mpz_clrbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s2, x);
+      bit2 = mpz_tstbit (x, bitindex);
+      mpz_combit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s3, x);
+      bit3 = mpz_tstbit (x, bitindex);
+
+#define FAIL(str) do { s = str; goto fail; } while (0)
+
+      if (bit1 != 1)  FAIL ("bit1 != 1");
+      if (bit2 != 0)  FAIL ("bit2 != 0");
+      if (bit3 != 1)  FAIL ("bit3 != 1");
+
+      if (bit0 == 0)
+	{
+	  if (mpz_cmp (s0, s1) == 0 || mpz_cmp (s0, s2) != 0 || mpz_cmp (s0, s3) == 0)
+	    abort ();
+	}
+      else
+	{
+	  if (mpz_cmp (s0, s1) != 0 || mpz_cmp (s0, s2) == 0 || mpz_cmp (s0, s3) != 0)
+	    abort ();
+	}
+
+      if (mpz_cmp (s1, s2) == 0 || mpz_cmp (s1, s3) != 0)
+	abort ();
+      if (mpz_cmp (s2, s3) == 0)
+	abort ();
+
+      mpz_combit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (s2, x) != 0)
+	abort ();
+
+      mpz_clrbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (s2, x) != 0)
+	abort ();
+
+      mpz_ui_pow_ui (m, 2L, bitindex);
+      MPZ_CHECK_FORMAT (m);
+      mpz_ior (x, s0, m);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (x, s3) != 0)
+	abort ();
+
+      mpz_com (m, m);
+      MPZ_CHECK_FORMAT (m);
+      mpz_and (x, s0, m);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (x, s2) != 0)
+	abort ();
+    }
+
+  mpz_clear (x);
+  mpz_clear (s0);
+  mpz_clear (s1);
+  mpz_clear (s2);
+  mpz_clear (s3);
+  mpz_clear (m);
+  return;
+
+
+ fail:
+  printf ("%s\n", s);
+  printf ("bitindex = %lu\n", bitindex);
+  printf ("x = "); mpz_out_str (stdout, -16, x); printf (" hex\n");
+  exit (1);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_clr_extend ();
+  check_com_negs ();
+  check_tstbit ();
+  check_random (argc, argv);
+  check_single ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-cdiv_ui.c b/tests/mpz/t-cdiv_ui.c
new file mode 100644
index 0000000..91559ea
--- /dev/null
+++ b/tests/mpz/t-cdiv_ui.c

@@ -0,0 +1,158 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_cdiv_qr_ui, mpz_cdiv_q_ui,
+   mpz_cdiv_r_ui, , mpz_cdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 10000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+	{
+	  mpz_rrandomb (bs, rands, 64);
+	  divisor = mpz_get_ui (bs);
+	}
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_cdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_cdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_cdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_cdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+	 with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+	dump_abort ("quotients from mpz_cdiv_qr_ui and mpz_cdiv_q_ui differ",
+		    dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+	dump_abort ("remainders from mpz_cdiv_qr_ui and mpz_cdiv_r_ui differ",
+		    dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+	if ((mpz_cmp_ui (quotient, 0) < 0)
+	    != (mpz_cmp_ui (dividend, 0) < 0))
+	dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the opposite sign as the (positive) divisor
+	 (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+	if (mpz_cmp_ui (remainder, 0) > 0)
+	  dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+	dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+	dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+	dump_abort ("remainder returned from mpz_cdiv_qr_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+	dump_abort ("remainder returned from mpz_cdiv_q_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+	dump_abort ("remainder returned from mpz_cdiv_r_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+	dump_abort ("remainder returned from mpz_cdiv_ui is wrong",
+		    dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-cmp.c b/tests/mpz/t-cmp.c
new file mode 100644
index 0000000..1ae2517
--- /dev/null
+++ b/tests/mpz/t-cmp.c

@@ -0,0 +1,181 @@
+/* Test mpz_cmp and mpz_cmpabs.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise some combinations of sizes and
+   signs.  */
+
+
+void
+check_one (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)
+{
+  int  got;
+
+  got = mpz_cmp (x, y);
+  if ((   got <  0) != (want_cmp <  0)
+      || (got == 0) != (want_cmp == 0)
+      || (got >  0) != (want_cmp >  0))
+    {
+      printf ("mpz_cmp got %d want %d\n", got, want_cmp);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      abort ();
+    }
+
+  got = mpz_cmpabs (x, y);
+  if ((   got <  0) != (want_cmpabs <  0)
+      || (got == 0) != (want_cmpabs == 0)
+      || (got >  0) != (want_cmpabs >  0))
+    {
+      printf ("mpz_cmpabs got %d want %d\n", got, want_cmpabs);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      abort ();
+    }
+}
+
+
+void
+check_all (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)
+{
+  check_one (x, y,  want_cmp,  want_cmpabs);
+  check_one (y, x, -want_cmp, -want_cmpabs);
+
+  mpz_neg (x, x);
+  mpz_neg (y, y);
+  want_cmp = -want_cmp;
+
+  check_one (x, y,  want_cmp,  want_cmpabs);
+  check_one (y, x, -want_cmp, -want_cmpabs);
+}
+
+
+#define SET1(z,size, n) \
+  SIZ(z) = size; PTR(z)[0] = n
+
+#define SET2(z,size, n1,n0) \
+  SIZ(z) = size; PTR(z)[1] = n1; PTR(z)[0] = n0
+
+#define SET4(z,size, n3,n2,n1,n0) \
+  SIZ(z) = size; PTR(z)[3] = n3; PTR(z)[2] = n2; PTR(z)[1] = n1; PTR(z)[0] = n0
+
+void
+check_various (void)
+{
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+
+  mpz_realloc (x, (mp_size_t) 20);
+  mpz_realloc (y, (mp_size_t) 20);
+
+  /* 0 cmp 0, junk in low limbs */
+  SET1 (x,0, 123);
+  SET1 (y,0, 456);
+  check_all (x, y, 0, 0);
+
+
+  /* 123 cmp 0 */
+  SET1 (x,1, 123);
+  SET1 (y,0, 456);
+  check_all (x, y, 1, 1);
+
+  /* 123:456 cmp 0 */
+  SET2 (x,2, 456,123);
+  SET1 (y,0, 9999);
+  check_all (x, y, 1, 1);
+
+
+  /* 123 cmp 123 */
+  SET1(x,1, 123);
+  SET1(y,1, 123);
+  check_all (x, y, 0, 0);
+
+  /* -123 cmp 123 */
+  SET1(x,-1, 123);
+  SET1(y,1,  123);
+  check_all (x, y, -1, 0);
+
+
+  /* 123 cmp 456 */
+  SET1(x,1, 123);
+  SET1(y,1, 456);
+  check_all (x, y, -1, -1);
+
+  /* -123 cmp 456 */
+  SET1(x,-1, 123);
+  SET1(y,1,  456);
+  check_all (x, y, -1, -1);
+
+  /* 123 cmp -456 */
+  SET1(x,1,  123);
+  SET1(y,-1, 456);
+  check_all (x, y, 1, -1);
+
+
+  /* 1:0 cmp 1:0 */
+  SET2 (x,2, 1,0);
+  SET2 (y,2, 1,0);
+  check_all (x, y, 0, 0);
+
+  /* -1:0 cmp 1:0 */
+  SET2 (x,-2, 1,0);
+  SET2 (y,2,  1,0);
+  check_all (x, y, -1, 0);
+
+
+  /* 2:0 cmp 1:0 */
+  SET2 (x,2, 2,0);
+  SET2 (y,2, 1,0);
+  check_all (x, y, 1, 1);
+
+
+  /* 4:3:2:1 cmp 2:1 */
+  SET4 (x,4, 4,3,2,1);
+  SET2 (y,2, 2,1);
+  check_all (x, y, 1, 1);
+
+  /* -4:3:2:1 cmp 2:1 */
+  SET4 (x,-4, 4,3,2,1);
+  SET2 (y,2,  2,1);
+  check_all (x, y, -1, 1);
+
+
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-cmp_d.c b/tests/mpz/t-cmp_d.c
new file mode 100644
index 0000000..d7b9895
--- /dev/null
+++ b/tests/mpz/t-cmp_d.c

@@ -0,0 +1,292 @@
+/* Test mpz_cmp_d and mpz_cmpabs_d.
+
+Copyright 2001-2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* FIXME: Not sure if the tests here are exhaustive.  Ought to try to get
+   each possible exit from mpz_cmp_d (and mpz_cmpabs_d) exercised.  */
+
+
+#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+
+void
+check_one (const char *name, mpz_srcptr x, double y, int cmp, int cmpabs)
+{
+  int   got;
+
+  got = mpz_cmp_d (x, y);
+  if (SGN(got) != cmp)
+    {
+      int i;
+      printf    ("mpz_cmp_d wrong (from %s)\n", name);
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmp);
+    fail:
+      mpz_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      mp_trace_base=-16;
+      mpz_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      printf    ("  y");
+      for (i = 0; i < sizeof(y); i++)
+        printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+      printf ("\n");
+      abort ();
+    }
+
+  got = mpz_cmpabs_d (x, y);
+  if (SGN(got) != cmpabs)
+    {
+      printf    ("mpz_cmpabs_d wrong\n");
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmpabs);
+      goto fail;
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *x;
+    double      y;
+    int         cmp, cmpabs;
+
+  } data[] = {
+
+    {  "0",  0.0,  0,  0 },
+
+    {  "1",  0.0,  1,  1 },
+    { "-1",  0.0, -1,  1 },
+
+    {  "1",  0.5,  1,  1 },
+    { "-1", -0.5, -1,  1 },
+
+    {  "0",  1.0, -1, -1 },
+    {  "0", -1.0,  1, -1 },
+
+    {  "0x1000000000000000000000000000000000000000000000000", 1.0,  1, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000", 1.0, -1, 1 },
+
+    {  "0",  1e100, -1, -1 },
+    {  "0", -1e100,  1, -1 },
+
+    {  "2",  1.5,   1,  1 },
+    {  "2", -1.5,   1,  1 },
+    { "-2",  1.5,  -1,  1 },
+    { "-2", -1.5,  -1,  1 },
+  };
+
+  mpz_t  x;
+  int    i;
+
+  mpz_init (x);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (x, data[i].x, 0);
+      check_one ("check_data", x, data[i].y, data[i].cmp, data[i].cmpabs);
+    }
+
+  mpz_clear (x);
+}
+
+
+/* Equality of integers with up to 53 bits */
+void
+check_onebits (void)
+{
+  mpz_t   x, x2;
+  double  y;
+  int     i;
+
+  mpz_init_set_ui (x, 0L);
+  mpz_init (x2);
+
+  for (i = 0; i < 512; i++)
+    {
+      mpz_mul_2exp (x, x, 1);
+      mpz_add_ui (x, x, 1L);
+
+      y = mpz_get_d (x);
+      mpz_set_d (x2, y);
+
+      /* stop if any truncation is occurring */
+      if (mpz_cmp (x, x2) != 0)
+        break;
+
+      check_one ("check_onebits", x, y, 0, 0);
+      check_one ("check_onebits", x, -y, 1, 0);
+      mpz_neg (x, x);
+      check_one ("check_onebits", x, y, -1, 0);
+      check_one ("check_onebits", x, -y, 0, 0);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+  mpz_clear (x2);
+}
+
+
+/* With the mpz differing by 1, in a limb position possibly below the double */
+void
+check_low_z_one (void)
+{
+  mpz_t          x;
+  double         y;
+  unsigned long  i;
+
+  mpz_init (x);
+
+  /* FIXME: It'd be better to base this on the float format. */
+#if defined (__vax) || defined (__vax__)
+#define LIM 127			/* vax fp numbers have limited range */
+#else
+#define LIM 512
+#endif
+
+  for (i = 1; i < LIM; i++)
+    {
+      mpz_set_ui (x, 1L);
+      mpz_mul_2exp (x, x, i);
+      y = mpz_get_d (x);
+
+      check_one ("check_low_z_one", x, y,   0, 0);
+      check_one ("check_low_z_one", x, -y,  1, 0);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 0);
+      check_one ("check_low_z_one", x, -y,  0, 0);
+      mpz_neg (x, x);
+
+      mpz_sub_ui (x, x, 1);
+
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+
+      mpz_add_ui (x, x, 2);
+
+      check_one ("check_low_z_one", x, y,   1, 1);
+      check_one ("check_low_z_one", x, -y,  1, 1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 1);
+      check_one ("check_low_z_one", x, -y, -1, 1);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+}
+
+/* Comparing 1 and 1+2^-n.  "y" is volatile to make gcc store and fetch it,
+   which forces it to a 64-bit double, whereas on x86 it would otherwise
+   remain on the float stack as an 80-bit long double.  */
+void
+check_one_2exp (void)
+{
+  double           e;
+  mpz_t            x;
+  volatile double  y;
+  int              i;
+
+  mpz_init (x);
+
+  e = 1.0;
+  for (i = 0; i < 128; i++)
+    {
+      e /= 2.0;
+      y = 1.0 + e;
+      if (y == 1.0)
+        break;
+
+      mpz_set_ui (x, 1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+
+      mpz_set_si (x, -1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+    }
+
+  mpz_clear (x);
+}
+
+void
+check_infinity (void)
+{
+  mpz_t   x;
+  double  y = tests_infinity_d ();
+  if (y == 0.0)
+    return;
+
+  mpz_init (x);
+
+  /* 0 cmp inf */
+  mpz_set_ui (x, 0L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 123 cmp inf */
+  mpz_set_ui (x, 123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -123 cmp inf */
+  mpz_set_si (x, -123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 2^5000 cmp inf */
+  mpz_set_ui (x, 1L);
+  mpz_mul_2exp (x, x, 5000L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -2^5000 cmp inf */
+  mpz_neg (x, x);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  mpz_clear (x);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+  check_onebits ();
+  check_low_z_one ();
+  check_one_2exp ();
+  check_infinity ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-cmp_si.c b/tests/mpz/t-cmp_si.c
new file mode 100644
index 0000000..7667d3e
--- /dev/null
+++ b/tests/mpz/t-cmp_si.c

@@ -0,0 +1,101 @@
+/* Test mpz_cmp_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *a, *b;
+    int         want;
+  } data[] = {
+    { "0",  "1", -1 },
+    { "0",  "0",  0 },
+    { "0", "-1",  1 },
+
+    { "1",  "1", 0 },
+    { "1",  "0", 1 },
+    { "1", "-1", 1 },
+
+    { "-1",  "1", -1 },
+    { "-1",  "0", -1 },
+    { "-1", "-1", 0 },
+
+    {           "0", "-0x80000000",  1 },
+    {  "0x80000000", "-0x80000000",  1 },
+    {  "0x80000001", "-0x80000000",  1 },
+    { "-0x80000000", "-0x80000000",  0 },
+    { "-0x80000001", "-0x80000000", -1 },
+
+    {                   "0", "-0x8000000000000000",  1 },
+    {  "0x8000000000000000", "-0x8000000000000000",  1 },
+    {  "0x8000000000000001", "-0x8000000000000000",  1 },
+    { "-0x8000000000000000", "-0x8000000000000000",  0 },
+    { "-0x8000000000000001", "-0x8000000000000000", -1 },
+  };
+
+  mpz_t  a, bz;
+  long   b;
+  int    got;
+  int    i;
+
+  mpz_init (a);
+  mpz_init (bz);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (bz, data[i].b, 0);
+
+      if (mpz_fits_slong_p (bz))
+	{
+	  b = mpz_get_si (bz);
+	  got = mpz_cmp_si (a, b);
+	  if (SGN (got) != data[i].want)
+	    {
+	      printf ("mpz_cmp_si wrong on data[%d]\n", i);
+	      printf ("  a="); mpz_out_str (stdout, 10, a); printf ("\n");
+	      printf ("  b=%ld\n", b);
+	      printf ("  got=%d\n", got);
+	      printf ("  want=%d\n", data[i].want);
+	      abort();
+	    }
+	}
+    }
+
+  mpz_clear (a);
+  mpz_clear (bz);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-cong.c b/tests/mpz/t-cong.c
new file mode 100644
index 0000000..59d8526
--- /dev/null
+++ b/tests/mpz/t-cong.c

@@ -0,0 +1,226 @@
+/* test mpz_congruent_p and mpz_congruent_ui_p
+
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d, int want)
+{
+  int   got;
+  int   swap;
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      got = (mpz_congruent_p (a, c, d) != 0);
+      if (want != got)
+	{
+	  printf ("mpz_congruent_p wrong\n");
+	  printf ("   expected %d got %d\n", want, got);
+	  mpz_trace ("	 a", a);
+	  mpz_trace ("	 c", c);
+	  mpz_trace ("	 d", d);
+	  mp_trace_base = -16;
+	  mpz_trace ("	 a", a);
+	  mpz_trace ("	 c", c);
+	  mpz_trace ("	 d", d);
+	  abort ();
+	}
+
+      if (mpz_fits_ulong_p (c) && mpz_fits_ulong_p (d))
+	{
+	  unsigned long	 uc = mpz_get_ui (c);
+	  unsigned long	 ud = mpz_get_ui (d);
+	  got = (mpz_congruent_ui_p (a, uc, ud) != 0);
+	  if (want != got)
+	    {
+	      printf	("mpz_congruent_ui_p wrong\n");
+	      printf	("   expected %d got %d\n", want, got);
+	      mpz_trace ("   a", a);
+	      printf	("   c=%lu\n", uc);
+	      printf	("   d=%lu\n", ud);
+	      mp_trace_base = -16;
+	      mpz_trace ("   a", a);
+	      printf	("   c=0x%lX\n", uc);
+	      printf	("   d=0x%lX\n", ud);
+	      abort ();
+	    }
+	}
+
+      MPZ_SRCPTR_SWAP (a, c);
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *c;
+    const char *d;
+    int        want;
+
+  } data[] = {
+
+    /* strict equality mod 0 */
+    { "0", "0", "0", 1 },
+    { "11", "11", "0", 1 },
+    { "3", "11", "0", 0 },
+
+    /* anything congruent mod 1 */
+    { "0", "0", "1", 1 },
+    { "1", "0", "1", 1 },
+    { "0", "1", "1", 1 },
+    { "123", "456", "1", 1 },
+    { "0x123456789123456789", "0x987654321987654321", "1", 1 },
+
+    /* csize==1, dsize==2 changing to 1 after stripping 2s */
+    { "0x3333333333333333",  "0x33333333",
+      "0x180000000", 1 },
+    { "0x33333333333333333333333333333333", "0x3333333333333333",
+      "0x18000000000000000", 1 },
+
+    /* another dsize==2 becoming 1, with opposite signs this time */
+    {  "0x444444441",
+      "-0x22222221F",
+       "0x333333330", 1 },
+    {  "0x44444444444444441",
+      "-0x2222222222222221F",
+       "0x33333333333333330", 1 },
+  };
+
+  mpz_t   a, c, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (c, data[i].c, 0);
+      mpz_set_str_or_abort (d, data[i].d, 0);
+      check_one (a, c, d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (d);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t   a, c, d, ra, rc;
+  int     i;
+  int     want;
+  int     reps = 10000;
+  mpz_t bs;
+  unsigned long size_range, size;
+
+  if (argc >= 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (bs);
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (d);
+  mpz_init (ra);
+  mpz_init (rc);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (a, rands, size);
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (c, rands, size);
+
+      do
+	{
+	  mpz_urandomb (bs, rands, 32);
+	  size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+	  mpz_urandomb (bs, rands, size_range);
+	  size = mpz_get_ui (bs);
+	  mpz_rrandomb (d, rands, size);
+	}
+      while (SIZ(d) == 0);
+
+      mpz_negrandom (a, rands);
+      MPZ_CHECK_FORMAT (a);
+      mpz_negrandom (c, rands);
+      MPZ_CHECK_FORMAT (c);
+      mpz_negrandom (d, rands);
+
+      mpz_fdiv_r (ra, a, d);
+      mpz_fdiv_r (rc, c, d);
+
+      want = (mpz_cmp (ra, rc) == 0);
+      check_one (a, c, d, want);
+
+      mpz_sub (ra, ra, rc);
+      mpz_sub (a, a, ra);
+      MPZ_CHECK_FORMAT (a);
+      check_one (a, c, d, 1);
+
+      if (! mpz_pow2abs_p (d))
+        {
+	  refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
+	  check_one (a, c, d, 0);
+        }
+    }
+
+  mpz_clear (bs);
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (d);
+  mpz_clear (ra);
+  mpz_clear (rc);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-cong_2exp.c b/tests/mpz/t-cong_2exp.c
new file mode 100644
index 0000000..5ffe2d1
--- /dev/null
+++ b/tests/mpz/t-cong_2exp.c

@@ -0,0 +1,207 @@
+/* test mpz_congruent_2exp_p */
+
+/*
+Copyright 2001, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr c, unsigned long d, int want)
+{
+  mpz_t  diff, d2exp;
+  int    got;
+  int    swap;
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      got = (mpz_congruent_2exp_p (a, c, d) != 0);
+      if (want != got)
+        {
+          mpz_init (diff);
+          mpz_init (d2exp);
+
+          mpz_sub (diff, a, c);
+          mpz_set_ui (d2exp, 1L);
+          mpz_mul_2exp (d2exp, d2exp, d);
+
+          printf ("mpz_congruent_2exp_p wrong\n");
+          printf ("   expected %d got %d\n", want, got);
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace (" a-c", diff);
+          mpz_trace (" 2^d", d2exp);
+          printf    ("   d=%lu\n", d);
+
+          mp_trace_base = -16;
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace (" a-c", diff);
+          mpz_trace (" 2^d", d2exp);
+          printf    ("   d=0x%lX\n", d);
+          abort ();
+        }
+
+      MPZ_SRCPTR_SWAP (a, c);
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *a;
+    const char     *c;
+    unsigned long  d;
+    int            want;
+
+  } data[] = {
+
+    /* anything is congruent mod 1 */
+    { "0", "0", 0, 1 },
+    { "1", "0", 0, 1 },
+    { "0", "1", 0, 1 },
+    { "123", "-456", 0, 1 },
+    { "0x123456789123456789", "0x987654321987654321", 0, 1 },
+    { "0xfffffffffffffffffffffffffffffff7", "-0x9", 129, 0 },
+    { "0xfffffffffffffffffffffffffffffff6", "-0xa", 128, 1 },
+
+  };
+
+  mpz_t   a, c;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (c);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (c, data[i].c, 0);
+      check_one (a, c, data[i].d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+}
+
+
+void
+check_random (int reps)
+{
+  gmp_randstate_ptr rands = RANDS;
+  unsigned long  d;
+  mpz_t  a, c, ra, rc;
+  int    i;
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (ra);
+  mpz_init (rc);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_errandomb (a, rands, 8*GMP_LIMB_BITS);
+      mpz_errandomb (c, rands, 8*GMP_LIMB_BITS);
+      d = urandom() % (8*GMP_LIMB_BITS);
+
+      mpz_mul_2exp (a, a, urandom() % (2*GMP_LIMB_BITS));
+      mpz_mul_2exp (c, c, urandom() % (2*GMP_LIMB_BITS));
+
+      mpz_negrandom (a, rands);
+      mpz_negrandom (c, rands);
+
+      mpz_fdiv_r_2exp (ra, a, d);
+      mpz_fdiv_r_2exp (rc, c, d);
+
+      mpz_sub (ra, ra, rc);
+      if (mpz_cmp_ui (ra, 0) != 0)
+	{
+	  check_one (a, c, d, 0);
+	  mpz_sub (a, a, ra);
+	}
+      check_one (a, c, d, 1);
+      if (d != 0)
+	{
+	  mpz_combit (a, urandom() % d);
+	  check_one (a, c, d, 0);
+	}
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (ra);
+  mpz_clear (rc);
+}
+
+void
+check_random_bits (int reps)
+{
+  mp_bitcnt_t ea, ec, en, d;
+  mp_bitcnt_t m = 10 * GMP_LIMB_BITS;
+  mpz_t  a, c;
+  int    i;
+
+  mpz_init2 (a, m + 1);
+  mpz_init2 (c, m);
+
+  for (i = 0; i < reps; i++)
+    {
+      d  = urandom() % m;
+      ea = urandom() % m;
+      ec = urandom() % m;
+      en = urandom() % m;
+
+      mpz_set_ui (c, 0);
+      mpz_setbit (c, en);
+
+      mpz_set_ui (a, 0);
+      mpz_setbit (a, ec);
+      mpz_sub (c , a, c);
+
+      mpz_set_ui (a, 0);
+      mpz_setbit (a, ea);
+      mpz_add (a , a, c);
+
+      check_one (a, c, d, ea >= d);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int    reps = 5000;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  check_data ();
+  check_random (reps);
+  check_random_bits (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-div_2exp.c b/tests/mpz/t-div_2exp.c
new file mode 100644
index 0000000..d012aae
--- /dev/null
+++ b/tests/mpz/t-div_2exp.c

@@ -0,0 +1,223 @@
+/* Test mpz_[cft]div_[qr]_2exp.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* If the remainder is in the correct range and q*d+r is correct, then q
+   must have rounded correctly.  */
+
+void
+check_one (mpz_srcptr a, unsigned long d)
+{
+  mpz_t  q, r, p, d2exp;
+  int    inplace;
+
+  mpz_init (d2exp);
+  mpz_init (q);
+  mpz_init (r);
+  mpz_init (p);
+
+  mpz_set_ui (d2exp, 1L);
+  mpz_mul_2exp (d2exp, d2exp, d);
+
+#define INPLACE(fun,dst,src,d)  \
+  if (inplace)                  \
+    {                           \
+      mpz_set (dst, src);       \
+      fun (dst, dst, d);        \
+    }                           \
+  else                          \
+    fun (dst, src, d);
+
+  for (inplace = 0; inplace <= 1; inplace++)
+    {
+      INPLACE (mpz_fdiv_q_2exp, q, a, d);
+      INPLACE (mpz_fdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) < 0 || mpz_cmp (r, d2exp) >= 0)
+	{
+	  printf ("mpz_fdiv_r_2exp result out of range\n");
+	  goto error;
+	}
+      if (mpz_cmp (p, a) != 0)
+	{
+	  printf ("mpz_fdiv_[qr]_2exp doesn't multiply back\n");
+	  goto error;
+	}
+
+
+      INPLACE (mpz_cdiv_q_2exp, q, a, d);
+      INPLACE (mpz_cdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) > 0 || mpz_cmpabs (r, d2exp) >= 0)
+	{
+	  printf ("mpz_cdiv_r_2exp result out of range\n");
+	  goto error;
+	}
+      if (mpz_cmp (p, a) != 0)
+	{
+	  printf ("mpz_cdiv_[qr]_2exp doesn't multiply back\n");
+	  goto error;
+	}
+
+
+      INPLACE (mpz_tdiv_q_2exp, q, a, d);
+      INPLACE (mpz_tdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) != 0 && mpz_sgn (r) != mpz_sgn (a))
+	{
+	  printf ("mpz_tdiv_r_2exp result wrong sign\n");
+	  goto error;
+	}
+      if (mpz_cmpabs (r, d2exp) >= 0)
+	{
+	  printf ("mpz_tdiv_r_2exp result out of range\n");
+	  goto error;
+	}
+      if (mpz_cmp (p, a) != 0)
+	{
+	  printf ("mpz_tdiv_[qr]_2exp doesn't multiply back\n");
+	  goto error;
+	}
+    }
+
+  mpz_clear (d2exp);
+  mpz_clear (q);
+  mpz_clear (r);
+  mpz_clear (p);
+  return;
+
+
+ error:
+  mpz_trace ("a", a);
+  printf    ("d=%lu\n", d);
+  mpz_trace ("q", q);
+  mpz_trace ("r", r);
+  mpz_trace ("p", p);
+
+  mp_trace_base = -16;
+  mpz_trace ("a", a);
+  printf    ("d=0x%lX\n", d);
+  mpz_trace ("q", q);
+  mpz_trace ("r", r);
+  mpz_trace ("p", p);
+
+  abort ();
+}
+
+
+void
+check_all (mpz_ptr a, unsigned long d)
+{
+  check_one (a, d);
+  mpz_neg (a, a);
+  check_one (a, d);
+}
+
+
+void
+check_various (void)
+{
+  static const unsigned long  table[] = {
+    0, 1, 2, 3, 4, 5,
+    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,
+    4*GMP_NUMB_BITS-1, 4*GMP_NUMB_BITS, 4*GMP_NUMB_BITS+1
+  };
+
+  int            i, j;
+  unsigned long  n, d;
+  mpz_t          a;
+
+  mpz_init (a);
+
+  /* a==0, and various d */
+  mpz_set_ui (a, 0L);
+  for (i = 0; i < numberof (table); i++)
+    check_one (a, table[i]);
+
+  /* a==2^n, and various d */
+  for (i = 0; i < numberof (table); i++)
+    {
+      n = table[i];
+      mpz_set_ui (a, 1L);
+      mpz_mul_2exp (a, a, n);
+
+      for (j = 0; j < numberof (table); j++)
+	{
+	  d = table[j];
+	  check_all (a, d);
+	}
+    }
+
+  mpz_clear (a);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr  rands = RANDS;
+  int            reps = 100;
+  mpz_t          a;
+  unsigned long  d;
+  int            i;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* exponentially within 2 to 257 bits */
+      mpz_erandomb (a, rands, urandom () % 8 + 2);
+
+      d = urandom () % 256;
+
+      check_all (a, d);
+    }
+
+  mpz_clear (a);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_various ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-divis.c b/tests/mpz/t-divis.c
new file mode 100644
index 0000000..fe8186a
--- /dev/null
+++ b/tests/mpz/t-divis.c

@@ -0,0 +1,166 @@
+/* test mpz_divisible_p and mpz_divisible_ui_p
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr d, int want)
+{
+  int   got;
+
+  if (mpz_fits_ulong_p (d))
+    {
+      unsigned long  u = mpz_get_ui (d);
+      got = (mpz_divisible_ui_p (a, u) != 0);
+      if (want != got)
+        {
+          printf ("mpz_divisible_ui_p wrong\n");
+          printf ("   expected %d got %d\n", want, got);
+          mpz_trace ("   a", a);
+          printf ("   d=%lu\n", u);
+          mp_trace_base = -16;
+          mpz_trace ("   a", a);
+          printf ("   d=0x%lX\n", u);
+          abort ();
+        }
+    }
+
+  got = (mpz_divisible_p (a, d) != 0);
+  if (want != got)
+    {
+      printf ("mpz_divisible_p wrong\n");
+      printf ("   expected %d got %d\n", want, got);
+      mpz_trace ("   a", a);
+      mpz_trace ("   d", d);
+      mp_trace_base = -16;
+      mpz_trace ("   a", a);
+      mpz_trace ("   d", d);
+      abort ();
+    }
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *d;
+    int        want;
+
+  } data[] = {
+
+    { "0",    "0", 1 },
+    { "17",   "0", 0 },
+    { "0",    "1", 1 },
+    { "123",  "1", 1 },
+    { "-123", "1", 1 },
+
+    { "0",  "2", 1 },
+    { "1",  "2", 0 },
+    { "2",  "2", 1 },
+    { "-2", "2", 1 },
+    { "0x100000000000000000000000000000000", "2", 1 },
+    { "0x100000000000000000000000000000001", "2", 0 },
+
+    { "0x3333333333333333", "3", 1 },
+    { "0x3333333333333332", "3", 0 },
+    { "0x33333333333333333333333333333333", "3", 1 },
+    { "0x33333333333333333333333333333332", "3", 0 },
+
+    /* divisor changes from 2 to 1 limb after stripping 2s */
+    {          "0x3333333300000000",         "0x180000000",         1 },
+    {  "0x33333333333333330000000000000000", "0x18000000000000000", 1 },
+    { "0x133333333333333330000000000000000", "0x18000000000000000", 0 },
+  };
+
+  mpz_t   a, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (d, data[i].d, 0);
+      check_one (a, d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+}
+
+void
+check_random (int reps)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t   a, d, r;
+  int     i;
+  int     want;
+
+  mpz_init (a);
+  mpz_init (d);
+  mpz_init (r);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_erandomb (a, rands, 1 << 19);
+      mpz_erandomb_nonzero (d, rands, 1 << 18);
+
+      mpz_fdiv_r (r, a, d);
+
+      want = (mpz_sgn (r) == 0);
+      check_one (a, d, want);
+
+      mpz_sub (a, a, r);
+      check_one (a, d, 1);
+
+      if (mpz_cmpabs_ui (d, 1L) == 0)
+        continue;
+
+      mpz_add_ui (a, a, 1L);
+      check_one (a, d, 0);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+  mpz_clear (r);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  reps = 100;
+
+  tests_start ();
+
+  TESTS_REPS (reps, argv, argc);
+
+  check_data ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-divis_2exp.c b/tests/mpz/t-divis_2exp.c
new file mode 100644
index 0000000..636c751
--- /dev/null
+++ b/tests/mpz/t-divis_2exp.c

@@ -0,0 +1,132 @@
+/* test mpz_divisible_2exp_p */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, unsigned long d, int want)
+{
+  int   got;
+
+  got = (mpz_divisible_2exp_p (a, d) != 0);
+  if (want != got)
+    {
+      printf ("mpz_divisible_2exp_p wrong\n");
+      printf ("   expected %d got %d\n", want, got);
+      mpz_trace ("   a", a);
+      printf    ("   d=%lu\n", d);
+      mp_trace_base = -16;
+      mpz_trace ("   a", a);
+      printf    ("   d=0x%lX\n", d);
+      abort ();
+    }
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    const char    *a;
+    unsigned long d;
+    int           want;
+
+  } data[] = {
+
+    { "0", 0, 1 },
+    { "0", 1, 1 },
+    { "0", 2, 1 },
+    { "0", 3, 1 },
+
+    { "1", 0, 1 },
+    { "1", 1, 0 },
+    { "1", 2, 0 },
+    { "1", 3, 0 },
+    { "1", 10000, 0 },
+
+    { "4", 0, 1 },
+    { "4", 1, 1 },
+    { "4", 2, 1 },
+    { "4", 3, 0 },
+    { "4", 4, 0 },
+    { "4", 10000, 0 },
+
+    { "0x80000000", 31, 1 },
+    { "0x80000000", 32, 0 },
+    { "0x80000000", 64, 0 },
+
+    { "0x100000000", 32, 1 },
+    { "0x100000000", 33, 0 },
+    { "0x100000000", 64, 0 },
+
+    { "0x8000000000000000", 63, 1 },
+    { "0x8000000000000000", 64, 0 },
+    { "0x8000000000000000", 128, 0 },
+
+    { "0x10000000000000000", 64, 1 },
+    { "0x10000000000000000", 65, 0 },
+    { "0x10000000000000000", 128, 0 },
+    { "0x10000000000000000", 256, 0 },
+
+    { "0x10000000000000000100000000", 32, 1 },
+    { "0x10000000000000000100000000", 33, 0 },
+    { "0x10000000000000000100000000", 64, 0 },
+
+    { "0x1000000000000000010000000000000000", 64, 1 },
+    { "0x1000000000000000010000000000000000", 65, 0 },
+    { "0x1000000000000000010000000000000000", 128, 0 },
+    { "0x1000000000000000010000000000000000", 256, 0 },
+    { "0x1000000000000000010000000000000000", 1024, 0 },
+
+  };
+
+  mpz_t   a, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      check_one (a, data[i].d, data[i].want);
+
+      mpz_neg (a, a);
+      check_one (a, data[i].d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-export.c b/tests/mpz/t-export.c
new file mode 100644
index 0000000..fa411e3
--- /dev/null
+++ b/tests/mpz/t-export.c

@@ -0,0 +1,205 @@
+/* Test mpz_export.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *src;
+    size_t      want_count;
+    int         order;
+    size_t      size;
+    int         endian;
+    int         nail;
+    char        want_data[64];
+
+  } data[] = {
+
+    { "0", 0,1, 1,1, 0 },
+    { "0", 0,1, 2,1, 0 },
+    { "0", 0,1, 3,1, 0 },
+
+    { "0x12345678", 4,1,  1,1, 0, { '\022', '\064', '\126', '\170' } },
+    { "0x12345678", 1,1,  4,1, 0, { '\022', '\064', '\126', '\170' } },
+    { "0x12345678", 1,-1, 4,1, 0, { '\022', '\064', '\126', '\170' } },
+
+    { "0x12345678", 4,-1, 1,-1, 0, { '\170', '\126', '\064', '\022' } },
+    { "0x12345678", 1,1,  4,-1, 0, { '\170', '\126', '\064', '\022' } },
+    { "0x12345678", 1,-1, 4,-1, 0, { '\170', '\126', '\064', '\022' } },
+
+    { "0x15", 5,1,  1,1, 7, { '\001', '\000', '\001', '\000', '\001' } },
+
+    { "0x1FFFFFFFFFFF", 3,1,  2,1,   1, {
+	'\177','\377', '\177','\377', '\177','\377' } },
+    { "0x1FFFFFFFFFFF", 3,1,  2,-1,  1, {
+	'\377','\177', '\377','\177', '\377','\177' } },
+    { "0x7",            3,1,  2,1,  15, {
+	'\000','\001', '\000','\001', '\000','\001' } },
+    { "0x7",            3,1,  2,-1, 15, {
+	'\001','\000', '\001','\000', '\001','\000' } },
+
+    { "0x24", 3,1,  2,1,  14, { '\000','\002', '\000','\001', '\000','\000' }},
+    { "0x24", 3,1,  2,-1, 14, { '\002','\000', '\001','\000', '\000','\000' }},
+    { "0x24", 3,-1, 2,-1, 14, { '\000','\000', '\001','\000', '\002','\000' }},
+    { "0x24", 3,-1, 2,1,  14, { '\000','\000', '\000','\001', '\000','\002' }},
+
+    { "0x123456789ABC", 3,1,  2,1,  0, {
+	'\022','\064', '\126','\170', '\232','\274' } },
+    { "0x123456789ABC", 3,-1, 2,1,  0, {
+	'\232','\274', '\126','\170', '\022','\064' } },
+    { "0x123456789ABC", 3,1,  2,-1, 0, {
+	'\064','\022', '\170','\126', '\274','\232' } },
+    { "0x123456789ABC", 3,-1, 2,-1, 0, {
+	'\274','\232', '\170','\126', '\064','\022' } },
+
+    { "0x112233445566778899AABBCC", 3,1,  4,1,  0,
+      { '\021','\042','\063','\104',
+	'\125','\146','\167','\210',
+	'\231','\252','\273','\314' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,1,  0,
+      { '\231','\252','\273','\314',
+	'\125','\146','\167','\210',
+	'\021','\042','\063','\104' } },
+    { "0x112233445566778899AABBCC", 3,1,  4,-1, 0,
+      { '\104','\063','\042','\021',
+	'\210','\167','\146','\125',
+	'\314','\273','\252','\231' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,-1, 0,
+      { '\314','\273','\252','\231',
+	'\210','\167','\146','\125',
+	'\104','\063','\042','\021' } },
+
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,1,  0,
+      { '\020','\001','\040','\002','\060','\003','\100','\004',
+	'\120','\005','\140','\006','\160','\007','\200','\010',
+	'\220','\011','\240','\012','\260','\013','\300','\014' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,1,  0,
+      { '\220','\011','\240','\012','\260','\013','\300','\014',
+	'\120','\005','\140','\006','\160','\007','\200','\010',
+	'\020','\001','\040','\002','\060','\003','\100','\004' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,-1, 0,
+      { '\004','\100','\003','\060','\002','\040','\001','\020',
+	'\010','\200','\007','\160','\006','\140','\005','\120',
+	'\014','\300','\013','\260','\012','\240','\011','\220' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,-1, 0,
+      { '\014','\300','\013','\260','\012','\240','\011','\220',
+	'\010','\200','\007','\160','\006','\140','\005','\120',
+	'\004','\100','\003','\060','\002','\040','\001','\020' } },
+
+    { "0x155555555555555555555555", 3,1,  4,1,  1,
+      { '\125','\125','\125','\125',
+	'\052','\252','\252','\252',
+	'\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,1,  1,
+      { '\125','\125','\125','\125',
+	'\052','\252','\252','\252',
+	'\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,1,  4,-1,  1,
+      { '\125','\125','\125','\125',
+	'\252','\252','\252','\052',
+	'\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,-1,  1,
+      { '\125','\125','\125','\125',
+	'\252','\252','\252','\052',
+	'\125','\125','\125','\125' } },
+  };
+
+  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t) + 128];
+  char    *got_data;
+  void    *ret;
+  size_t  align, got_count, j;
+  int     i, error = 0;
+  mpz_t   src;
+
+  mpz_init (src);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (align = 0; align < sizeof (mp_limb_t); align++)
+	{
+	  mpz_set_str_or_abort (src, data[i].src, 0);
+	  MPZ_CHECK_FORMAT (src);
+	  got_data = buf + align;
+
+	  ASSERT_ALWAYS (data[i].want_count * data[i].size + align
+			 <= sizeof (buf));
+
+	  memset (got_data, '\0', data[i].want_count * data[i].size);
+	  ret = mpz_export (got_data, &got_count, data[i].order,
+			    data[i].size, data[i].endian, data[i].nail, src);
+
+	  if (ret != got_data)
+	    {
+	      printf ("return doesn't equal given pointer\n");
+	      error = 1;
+	    }
+	  if (got_count != data[i].want_count)
+	    {
+	      printf ("wrong count\n");
+	      error = 1;
+	    }
+	  if (memcmp (got_data, data[i].want_data, got_count * data[i].size) != 0)
+	    {
+	      printf ("wrong result data\n");
+	      error = 1;
+	    }
+	  if (error)
+	    {
+	      printf ("    at data[%d]  align=%d\n", i, (int) align);
+	      printf ("    src \"%s\"\n", data[i].src);
+	      mpz_trace ("    src", src);
+	      printf ("    order=%d  size=%lu endian=%d nail=%u\n",
+		      data[i].order,
+		      (unsigned long) data[i].size, data[i].endian, data[i].nail);
+	      printf ("    want count %lu\n", (unsigned long) data[i].want_count);
+	      printf ("    got count  %lu\n", (unsigned long) got_count);
+	      printf ("    want");
+	      for (j = 0; j < data[i].want_count*data[i].size; j++)
+		printf (" 0x%02X,", (unsigned) (unsigned char) data[i].want_data[j]);
+	      printf ("\n");
+	      printf ("    got ");
+	      for (j = 0; j < got_count*data[i].size; j++)
+		printf (" 0x%02X,", (unsigned) (unsigned char) got_data[j]);
+	      printf ("\n");
+	      abort ();
+	    }
+	}
+    }
+  mpz_clear (src);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  mp_trace_base = -16;
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-fac_ui.c b/tests/mpz/t-fac_ui.c
new file mode 100644
index 0000000..15b6070
--- /dev/null
+++ b/tests/mpz/t-fac_ui.c

@@ -0,0 +1,108 @@
+/* Exercise mpz_fac_ui and mpz_2fac_ui.
+
+Copyright 2000-2002, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-fac_ui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n, m;
+  unsigned long  limit = 2222;
+  mpz_t          df[2], f, r;
+
+  tests_start ();
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else
+    TESTS_REPS (limit, argv, argc);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  mpz_init_set_ui (df[0], 1);  /* 0!! = 1 */
+  mpz_init_set_ui (df[1], 1);  /* -1!! = 1 */
+  mpz_init_set_ui (f, 1);  /* 0! = 1 */
+  mpz_init (r);
+
+  for (n = 0, m = 0; n < limit; n++)
+    {
+      mpz_fac_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (f, r) != 0)
+        {
+          printf ("mpz_fac_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+          abort ();
+        }
+
+      mpz_2fac_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (df[m], r) != 0)
+        {
+          printf ("mpz_2fac_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, df[m]); printf("\n");
+          abort ();
+        }
+
+      m ^= 1;
+      mpz_mul_ui (df[m], df[m], n+1);  /* (n+1)!! = (n-1)!! * (n+1) */
+      mpz_mul_ui (f, f, n+1);  /* (n+1)! = n! * (n+1) */
+    }
+
+  n = 2097169; /* a prime = 1 mod 4*/
+  if (n / 2 > MP_LIMB_T_MAX)
+    n = 131041; /* a smaller prime :-) */
+  mpz_fac_ui (f, n / 2); /* ((n-1)/2)! */
+  m = mpz_fdiv_ui (f, n); /* ((n-1)/2)! mod n*/
+  mpz_set_ui (f, m);
+  mpz_mul_ui (f, f, m); /* (((n-1)/2)!)^2 */
+  m = mpz_fdiv_ui (f, n); /* (((n-1)/2)!)^2 mod n*/
+  if ( m != n - 1)
+    {
+      printf ("mpz_fac_ui(%lu) wrong\n", n / 2);
+      printf (" al-Haytham's theorem not verified: got %lu, expected %lu.\n", m, n - 1);
+      abort ();
+    }
+
+  mpz_clear (df[0]);
+  mpz_clear (df[1]);
+  mpz_clear (f);
+  mpz_clear (r);
+
+  tests_end ();
+
+  exit (0);
+}

diff --git a/tests/mpz/t-fdiv.c b/tests/mpz/t-fdiv.c
new file mode 100644
index 0000000..71503df
--- /dev/null
+++ b/tests/mpz/t-fdiv.c

@@ -0,0 +1,146 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr, mpz_fdiv_q,
+   mpz_fdiv_r, mpz_mul.
+
+Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend, divisor;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (divisor);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 2; /* 0..131071 bit operands */
+
+      do
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  divisor_size = mpz_get_ui (bs);
+	  mpz_rrandomb (divisor, rands, divisor_size);
+	}
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (dividend, dividend);
+      if ((bsi & 2) != 0)
+	mpz_neg (divisor, divisor);
+
+      /* printf ("%ld %ld\n", SIZ (dividend), SIZ (divisor)); */
+
+      mpz_fdiv_qr (quotient, remainder, dividend, divisor);
+      mpz_fdiv_q (quotient2, dividend, divisor);
+      mpz_fdiv_r (remainder2, dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+	 with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+	dump_abort (dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+	dump_abort (dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+	if ((mpz_cmp_ui (quotient, 0) < 0)
+	    != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))
+	dump_abort (dividend, divisor);
+
+      /* Check if the remainder has the same sign as the divisor
+	 (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+	if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (divisor, 0) < 0))
+	  dump_abort (dividend, divisor);
+
+      mpz_mul (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+	dump_abort (dividend, divisor);
+
+      mpz_abs (temp, divisor);
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp (remainder, temp) >= 0)
+	dump_abort (dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (divisor);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t dividend, mpz_t divisor)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-fdiv_ui.c b/tests/mpz/t-fdiv_ui.c
new file mode 100644
index 0000000..87620ca
--- /dev/null
+++ b/tests/mpz/t-fdiv_ui.c

@@ -0,0 +1,158 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr_ui, mpz_fdiv_q_ui,
+   mpz_fdiv_r_ui, mpz_fdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 10000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+	{
+	  mpz_rrandomb (bs, rands, 64);
+	  divisor = mpz_get_ui (bs);
+	}
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_fdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_fdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_fdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_fdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+	 with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+	dump_abort ("quotients from mpz_fdiv_qr_ui and mpz_fdiv_q_ui differ",
+		    dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+	dump_abort ("remainders from mpz_fdiv_qr_ui and mpz_fdiv_r_ui differ",
+		    dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+	if ((mpz_cmp_ui (quotient, 0) < 0)
+	    != (mpz_cmp_ui (dividend, 0) < 0))
+	dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the same sign as the (positive) divisor
+	 (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+	if (mpz_cmp_ui (remainder, 0) < 0)
+	  dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+	dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+	dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+	dump_abort ("remainder returned from mpz_fdiv_qr_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+	dump_abort ("remainder returned from mpz_fdiv_q_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+	dump_abort ("remainder returned from mpz_fdiv_r_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+	dump_abort ("remainder returned from mpz_fdiv_ui is wrong",
+		    dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-fib_ui.c b/tests/mpz/t-fib_ui.c
new file mode 100644
index 0000000..ec9425c
--- /dev/null
+++ b/tests/mpz/t-fib_ui.c

@@ -0,0 +1,155 @@
+/* Test mpz_fib_ui and mpz_fib2_ui.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-fib_ui [x|num]
+
+   Run with no arguments, tests goes up to the initial value of "limit"
+   below.  With a number argument tests are carried up that far, or with a
+   literal "x" tests are continued without limit (this being only meant for
+   development purposes).
+
+   The size tests performed are designed to partially replicate what will be
+   going on in mpz_fib_ui.  There's plenty of ASSERTs there, but of course
+   they're not normally enabled.
+
+   Misfeatures:
+
+   The tests on MPN_FIB2_SIZE are a bit useless, since that macro includes a
+   +2 for the internal purposes of mpn_fib2_ui.  It's probably better to
+   give mpn_fib2_ui a run with assertion checking enabled.  */
+
+
+#define MPZ_FIB_SIZE_FLOAT(n) \
+  ((mp_size_t) ((n) * 0.6942419 / GMP_NUMB_BITS + 1))
+
+
+void
+check_fib_table (void)
+{
+  int        i;
+  mp_limb_t  want;
+
+  ASSERT_ALWAYS (FIB_TABLE(-1) == 1);
+  ASSERT_ALWAYS (FIB_TABLE(0) == 0);
+
+  for (i = 1; i <= FIB_TABLE_LIMIT; i++)
+    {
+      want = FIB_TABLE(i-1) + FIB_TABLE(i-2);
+      if (FIB_TABLE(i) != want)
+        {
+          printf ("FIB_TABLE(%d) wrong\n", i);
+          gmp_printf ("  got  %#Nx\n", &FIB_TABLE(i), 1);
+          gmp_printf ("  want %#Nx\n", &want, 1);
+          abort ();
+        }
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 100 * GMP_LIMB_BITS;
+  mpz_t          want_fn, want_fn1, got_fn, got_fn1;
+
+  tests_start ();
+  mp_trace_base = -16;
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else
+    TESTS_REPS (limit, argv, argc);
+
+  check_fib_table ();
+
+  /* start at n==0 */
+  mpz_init_set_ui (want_fn1, 1);  /* F[-1] */
+  mpz_init_set_ui (want_fn,  0);  /* F[0]   */
+  mpz_init (got_fn);
+  mpz_init (got_fn1);
+
+  for (n = 0; n < limit; n++)
+    {
+      /* check our float formula seems right */
+      if (MPZ_FIB_SIZE_FLOAT (n) < SIZ(want_fn))
+        {
+          printf ("MPZ_FIB_SIZE_FLOAT wrong at n=%lu\n", n);
+          printf ("  MPZ_FIB_SIZE_FLOAT  %ld\n", MPZ_FIB_SIZE_FLOAT (n));
+          printf ("  SIZ(want_fn)        %d\n", SIZ(want_fn));
+          abort ();
+        }
+
+      /* check MPN_FIB2_SIZE seems right, compared to actual size and
+         compared to our float formula */
+      if (MPN_FIB2_SIZE (n) < MPZ_FIB_SIZE_FLOAT (n))
+        {
+          printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
+          printf ("  MPN_FIB2_SIZE       %ld\n", MPN_FIB2_SIZE (n));
+          printf ("  MPZ_FIB_SIZE_FLOAT  %ld\n", MPZ_FIB_SIZE_FLOAT (n));
+          abort ();
+        }
+      if (MPN_FIB2_SIZE (n) < SIZ(want_fn))
+        {
+          printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
+          printf ("  MPN_FIB2_SIZE  %ld\n", MPN_FIB2_SIZE (n));
+          printf ("  SIZ(want_fn)   %d\n", SIZ(want_fn));
+          abort ();
+        }
+
+      mpz_fib2_ui (got_fn, got_fn1, n);
+      MPZ_CHECK_FORMAT (got_fn);
+      MPZ_CHECK_FORMAT (got_fn1);
+      if (mpz_cmp (got_fn, want_fn) != 0 || mpz_cmp (got_fn1, want_fn1) != 0)
+        {
+          printf ("mpz_fib2_ui(%lu) wrong\n", n);
+          mpz_trace ("want fn ", want_fn);
+          mpz_trace ("got  fn ",  got_fn);
+          mpz_trace ("want fn1", want_fn1);
+          mpz_trace ("got  fn1",  got_fn1);
+          abort ();
+        }
+
+      mpz_fib_ui (got_fn, n);
+      MPZ_CHECK_FORMAT (got_fn);
+      if (mpz_cmp (got_fn, want_fn) != 0)
+        {
+          printf ("mpz_fib_ui(%lu) wrong\n", n);
+          mpz_trace ("want fn", want_fn);
+          mpz_trace ("got  fn", got_fn);
+          abort ();
+        }
+
+      mpz_add (want_fn1, want_fn1, want_fn);  /* F[n+1] = F[n] + F[n-1] */
+      mpz_swap (want_fn1, want_fn);
+    }
+
+  mpz_clear (want_fn);
+  mpz_clear (want_fn1);
+  mpz_clear (got_fn);
+  mpz_clear (got_fn1);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-fits.c b/tests/mpz/t-fits.c
new file mode 100644
index 0000000..6819588
--- /dev/null
+++ b/tests/mpz/t-fits.c

@@ -0,0 +1,197 @@
+/* Test mpz_fits_*_p */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise mpz_fits_*_p on a small amount
+   of data. */
+
+#define EXPECT_S(fun,name,answer)                                       \
+  got = fun (z);                                                        \
+  if (got != answer)                                                    \
+    {                                                                   \
+      printf ("%s (%s) got %d want %d\n", name, expr, got, answer);     \
+      printf (" z size %d\n", SIZ(z));                                  \
+      printf (" z dec "); mpz_out_str (stdout, 10, z); printf ("\n");   \
+      printf (" z hex "); mpz_out_str (stdout, 16, z); printf ("\n");   \
+      error = 1;                                                        \
+    }
+
+#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)
+
+int
+main (void)
+{
+  mpz_t       z;
+  int         got;
+  const char  *expr;
+  int         error = 0;
+
+  tests_start ();
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  expr = "0";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_ui (z, 1L);
+  expr = "1";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, -1L);
+  expr = "-1";
+  EXPECT (mpz_fits_ulong_p, 0);
+  EXPECT (mpz_fits_uint_p, 0);
+  EXPECT (mpz_fits_ushort_p, 0);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_ui (z, 1L);
+  mpz_mul_2exp (z, z, 5L*GMP_LIMB_BITS);
+  expr = "2^(5*BPML)";
+  EXPECT (mpz_fits_ulong_p, 0);
+  EXPECT (mpz_fits_uint_p, 0);
+  EXPECT (mpz_fits_ushort_p, 0);
+  EXPECT (mpz_fits_slong_p, 0);
+  EXPECT (mpz_fits_sint_p, 0);
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_ui (z, (unsigned long) USHRT_MAX);
+  expr = "USHRT_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+
+  mpz_set_ui (z, (unsigned long) USHRT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "USHRT_MAX + 1";
+  EXPECT (mpz_fits_ushort_p, 0);
+
+
+  mpz_set_ui (z, (unsigned long) UINT_MAX);
+  expr = "UINT_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+
+  mpz_set_ui (z, (unsigned long) UINT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "UINT_MAX + 1";
+  EXPECT (mpz_fits_uint_p, 0);
+
+
+  mpz_set_ui (z, ULONG_MAX);
+  expr = "ULONG_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+
+  mpz_set_ui (z, ULONG_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "ULONG_MAX + 1";
+  EXPECT (mpz_fits_ulong_p, 0);
+
+
+  mpz_set_si (z, (long) SHRT_MAX);
+  expr = "SHRT_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, (long) SHRT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "SHRT_MAX + 1";
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_si (z, (long) INT_MAX);
+  expr = "INT_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+
+  mpz_set_si (z, (long) INT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "INT_MAX + 1";
+  EXPECT (mpz_fits_sint_p, 0);
+
+
+  mpz_set_si (z, LONG_MAX);
+  expr = "LONG_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+
+  mpz_set_si (z, LONG_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "LONG_MAX + 1";
+  EXPECT (mpz_fits_slong_p, 0);
+
+
+  mpz_set_si (z, (long) SHRT_MIN);
+  expr = "SHRT_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, (long) SHRT_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "SHRT_MIN + 1";
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_si (z, (long) INT_MIN);
+  expr = "INT_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+
+  mpz_set_si (z, (long) INT_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "INT_MIN + 1";
+  EXPECT (mpz_fits_sint_p, 0);
+
+
+  mpz_set_si (z, LONG_MIN);
+  expr = "LONG_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+
+  mpz_set_si (z, LONG_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "LONG_MIN + 1";
+  EXPECT (mpz_fits_slong_p, 0);
+
+
+  if (error)
+    abort ();
+
+  mpz_clear (z);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-gcd.c b/tests/mpz/t-gcd.c
new file mode 100644
index 0000000..46b278a
--- /dev/null
+++ b/tests/mpz/t-gcd.c

@@ -0,0 +1,467 @@
+/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2005, 2008, 2009, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void one_test (mpz_t, mpz_t, mpz_t, int);
+void debug_mp (mpz_t, int);
+
+static int gcdext_valid_p (const mpz_t, const mpz_t, const mpz_t, const mpz_t);
+
+/* Keep one_test's variables global, so that we don't need
+   to reinitialize them for each test.  */
+mpz_t gcd1, gcd2, s, temp1, temp2, temp3;
+
+#define MAX_SCHOENHAGE_THRESHOLD HGCD_REDUCE_THRESHOLD
+
+/* Define this to make all operands be large enough for Schoenhage gcd
+   to be used.  */
+#ifndef WHACK_SCHOENHAGE
+#define WHACK_SCHOENHAGE 0
+#endif
+
+#if WHACK_SCHOENHAGE
+#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
+#else
+#define MIN_OPERAND_BITSIZE 1
+#endif
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *b;
+    const char *want;
+  } data[] = {
+    /* This tickled a bug in gmp 4.1.2 mpn/x86/k6/gcd_finda.asm. */
+    { "0x3FFC000007FFFFFFFFFF00000000003F83FFFFFFFFFFFFFFF80000000000000001",
+      "0x1FFE0007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC000000000000000000000001",
+      "5" }
+  };
+
+  mpz_t  a, b, got, want;
+  int    i;
+
+  mpz_inits (a, b, got, want, NULL);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (b, data[i].b, 0);
+      mpz_set_str_or_abort (want, data[i].want, 0);
+      mpz_gcd (got, a, b);
+      MPZ_CHECK_FORMAT (got);
+      if (mpz_cmp (got, want) != 0)
+	{
+	  printf    ("mpz_gcd wrong on data[%d]\n", i);
+	  printf    (" a  %s\n", data[i].a);
+	  printf    (" b  %s\n", data[i].b);
+	  mpz_trace (" a", a);
+	  mpz_trace (" b", b);
+	  mpz_trace (" want", want);
+	  mpz_trace (" got ", got);
+	  abort ();
+	}
+    }
+
+  mpz_clears (a, b, got, want, NULL);
+}
+
+void
+make_chain_operands (mpz_t ref, mpz_t a, mpz_t b, gmp_randstate_t rs, int nb1, int nb2, int chain_len)
+{
+  mpz_t bs, temp1, temp2;
+  int j;
+
+  mpz_inits (bs, temp1, temp2, NULL);
+
+  /* Generate a division chain backwards, allowing otherwise unlikely huge
+     quotients.  */
+
+  mpz_set_ui (a, 0);
+  mpz_urandomb (bs, rs, 32);
+  mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb1 + 1);
+  mpz_rrandomb (b, rs, mpz_get_ui (bs));
+  mpz_add_ui (b, b, 1);
+  mpz_set (ref, b);
+
+  for (j = 0; j < chain_len; j++)
+    {
+      mpz_urandomb (bs, rs, 32);
+      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+      mpz_add_ui (temp2, temp2, 1);
+      mpz_mul (temp1, b, temp2);
+      mpz_add (a, a, temp1);
+
+      mpz_urandomb (bs, rs, 32);
+      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+      mpz_add_ui (temp2, temp2, 1);
+      mpz_mul (temp1, a, temp2);
+      mpz_add (b, b, temp1);
+    }
+
+  mpz_clears (bs, temp1, temp2, NULL);
+}
+
+/* Test operands from a table of seed data.  This variant creates the operands
+   using plain ol' mpz_rrandomb.  This is a hack for better coverage of the gcd
+   code, which depends on that the random number generators give the exact
+   numbers we expect.  */
+void
+check_kolmo1 (void)
+{
+  static const struct {
+    unsigned int seed;
+    int nb;
+    const char *want;
+  } data[] = {
+    { 59618, 38208, "5"},
+    { 76521, 49024, "3"},
+    { 85869, 54976, "1"},
+    { 99449, 63680, "1"},
+    {112453, 72000, "1"}
+  };
+
+  gmp_randstate_t rs;
+  mpz_t  bs, a, b, want;
+  int    i, unb, vnb, nb;
+
+  gmp_randinit_default (rs);
+
+  mpz_inits (bs, a, b, want, NULL);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      nb = data[i].nb;
+
+      gmp_randseed_ui (rs, data[i].seed);
+
+      mpz_urandomb (bs, rs, 32);
+      unb = mpz_get_ui (bs) % nb;
+      mpz_urandomb (bs, rs, 32);
+      vnb = mpz_get_ui (bs) % nb;
+
+      mpz_rrandomb (a, rs, unb);
+      mpz_rrandomb (b, rs, vnb);
+
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      one_test (a, b, want, -1);
+    }
+
+  mpz_clears (bs, a, b, want, NULL);
+  gmp_randclear (rs);
+}
+
+/* Test operands from a table of seed data.  This variant creates the operands
+   using a division chain.  This is a hack for better coverage of the gcd
+   code, which depends on that the random number generators give the exact
+   numbers we expect.  */
+void
+check_kolmo2 (void)
+{
+  static const struct {
+    unsigned int seed;
+    int nb, chain_len;
+  } data[] = {
+    {  917, 15, 5 },
+    { 1032, 18, 6 },
+    { 1167, 18, 6 },
+    { 1174, 18, 6 },
+    { 1192, 18, 6 },
+  };
+
+  gmp_randstate_t rs;
+  mpz_t  bs, a, b, want;
+  int    i;
+
+  gmp_randinit_default (rs);
+
+  mpz_inits (bs, a, b, want, NULL);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      gmp_randseed_ui (rs, data[i].seed);
+      make_chain_operands (want, a, b, rs, data[i].nb, data[i].nb, data[i].chain_len);
+      one_test (a, b, want, -1);
+    }
+
+  mpz_clears (bs, a, b, want, NULL);
+  gmp_randclear (rs);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, ref;
+  int i, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  long int reps = 200;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_inits (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
+
+  check_data ();
+  check_kolmo1 ();
+  check_kolmo2 ();
+
+  /* Testcase to exercise the u0 == u1 case in mpn_gcdext_lehmer_n. */
+  /* mpz_set_ui (op2, GMP_NUMB_MAX); */ /* FIXME: Huge limb doesn't always fit */
+  mpz_set_ui (op2, 0);
+  mpz_setbit (op2, GMP_NUMB_BITS);
+  mpz_sub_ui (op2, op2, 1);
+  mpz_mul_2exp (op1, op2, 100);
+  mpz_add (op1, op1, op2);
+  mpz_mul_ui (op2, op2, 2);
+  one_test (op1, op2, NULL, -1);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+	 have proven to trigger certain bugs in development versions of the
+	 gcd code.  The "hgcd->row[3].rsize > M" ASSERT is not triggered by
+	 the division chain code below, but that is most likely just a result
+	 of that other ASSERTs are triggered before it.  */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);
+
+      mpz_urandomb (bs, rands, 8);
+      bsi = mpz_get_ui (bs);
+
+      if ((bsi & 0x3c) == 4)
+	mpz_mul (op1, op1, op2);	/* make op1 a multiple of op2 */
+      else if ((bsi & 0x3c) == 8)
+	mpz_mul (op2, op1, op2);	/* make op2 a multiple of op1 */
+
+      if ((bsi & 1) != 0)
+	mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+	mpz_neg (op2, op2);
+
+      one_test (op1, op2, NULL, i);
+
+      /* Generate a division chain backwards, allowing otherwise unlikely huge
+	 quotients.  */
+
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % LOG2C (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD);
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (1 << chain_len) / 32;
+
+      make_chain_operands (ref, op1, op2, rands, 16, 12, chain_len);
+
+      one_test (op1, op2, ref, i);
+    }
+
+  /* Check that we can use NULL as first argument of mpz_gcdext.  */
+  mpz_set_si (op1, -10);
+  mpz_set_si (op2, 0);
+  mpz_gcdext (NULL, temp1, temp2, op1, op2);
+  ASSERT_ALWAYS (mpz_cmp_si (temp1, -1) == 0);
+  ASSERT_ALWAYS (mpz_cmp_si (temp2, 0) == 0);
+  mpz_set_si (op2, 6);
+  mpz_gcdext (NULL, temp1, temp2, op1, op2);
+  ASSERT_ALWAYS (mpz_cmp_si (temp1, 1) == 0);
+  ASSERT_ALWAYS (mpz_cmp_si (temp2, 2) == 0);
+
+  mpz_clears (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+void
+one_test (mpz_t op1, mpz_t op2, mpz_t ref, int i)
+{
+  /*
+  printf ("%d %d %d\n", SIZ (op1), SIZ (op2), ref != NULL ? SIZ (ref) : 0);
+  fflush (stdout);
+  */
+
+  /*
+  fprintf (stderr, "op1=");  debug_mp (op1, -16);
+  fprintf (stderr, "op2=");  debug_mp (op2, -16);
+  */
+
+  mpz_gcdext (gcd1, s, NULL, op1, op2);
+  MPZ_CHECK_FORMAT (gcd1);
+  MPZ_CHECK_FORMAT (s);
+
+  if (ref && mpz_cmp (ref, gcd1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (ref, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd1, -16);
+      abort ();
+    }
+
+  if (!gcdext_valid_p(op1, op2, gcd1, s))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned invalid result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd1, -16);
+      fprintf (stderr, "s=");                   debug_mp (s, -16);
+      abort ();
+    }
+
+  mpz_gcd (gcd2, op1, op2);
+  MPZ_CHECK_FORMAT (gcd2);
+
+  if (mpz_cmp (gcd2, gcd1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcd returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+      fprintf (stderr, "mpz_gcd returns:\n");   debug_mp (gcd2, -16);
+      abort ();
+    }
+
+  /* This should probably move to t-gcd_ui.c */
+  if (mpz_fits_ulong_p (op1) || mpz_fits_ulong_p (op2))
+    {
+      if (mpz_fits_ulong_p (op1))
+	mpz_gcd_ui (gcd2, op2, mpz_get_ui (op1));
+      else
+	mpz_gcd_ui (gcd2, op1, mpz_get_ui (op2));
+      if (mpz_cmp (gcd2, gcd1))
+	{
+	  fprintf (stderr, "ERROR in test %d\n", i);
+	  fprintf (stderr, "mpz_gcd_ui returned incorrect result\n");
+	  fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+	  fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+	  fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+	  fprintf (stderr, "mpz_gcd_ui returns:\n");   debug_mp (gcd2, -16);
+	  abort ();
+	}
+    }
+
+  mpz_gcdext (gcd2, temp1, temp2, op1, op2);
+  MPZ_CHECK_FORMAT (gcd2);
+  MPZ_CHECK_FORMAT (temp1);
+  MPZ_CHECK_FORMAT (temp2);
+
+  mpz_mul (temp1, temp1, op1);
+  mpz_mul (temp2, temp2, op2);
+  mpz_add (temp1, temp1, temp2);
+
+  if (mpz_cmp (gcd1, gcd2) != 0
+      || mpz_cmp (gcd2, temp1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd2, -16);
+      abort ();
+    }
+}
+
+/* Called when g is supposed to be gcd(a,b), and g = s a + t b, for some t.
+   Uses temp1, temp2 and temp3. */
+static int
+gcdext_valid_p (const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s)
+{
+  /* It's not clear that gcd(0,0) is well defined, but we allow it and require that
+     gcd(0,0) = 0. */
+  if (mpz_sgn (g) < 0)
+    return 0;
+
+  if (mpz_sgn (a) == 0)
+    {
+      /* Must have g == abs (b). Any value for s is in some sense "correct",
+	 but it makes sense to require that s == 0. */
+      return mpz_cmpabs (g, b) == 0 && mpz_sgn (s) == 0;
+    }
+  else if (mpz_sgn (b) == 0)
+    {
+      /* Must have g == abs (a), s == sign (a) */
+      return mpz_cmpabs (g, a) == 0 && mpz_cmp_si (s, mpz_sgn (a)) == 0;
+    }
+
+  if (mpz_sgn (g) <= 0)
+    return 0;
+
+  mpz_tdiv_qr (temp1, temp3, a, g);
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  mpz_tdiv_qr (temp2, temp3, b, g);
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  /* Require that 2 |s| < |b/g|, or |s| == 1. */
+  if (mpz_cmpabs_ui (s, 1) > 0)
+    {
+      mpz_mul_2exp (temp3, s, 1);
+      if (mpz_cmpabs (temp3, temp2) >= 0)
+	return 0;
+    }
+
+  /* Compute the other cofactor. */
+  mpz_mul(temp2, s, a);
+  mpz_sub(temp2, g, temp2);
+  mpz_tdiv_qr(temp2, temp3, temp2, b);
+
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  /* Require that 2 |t| < |a/g| or |t| == 1*/
+  if (mpz_cmpabs_ui (temp2, 1) > 0)
+    {
+      mpz_mul_2exp (temp2, temp2, 1);
+      if (mpz_cmpabs (temp2, temp1) >= 0)
+	return 0;
+    }
+  return 1;
+}

diff --git a/tests/mpz/t-gcd_ui.c b/tests/mpz/t-gcd_ui.c
new file mode 100644
index 0000000..3f56a97
--- /dev/null
+++ b/tests/mpz/t-gcd_ui.c

@@ -0,0 +1,156 @@
+/* Test mpz_gcd_ui.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Check mpz_gcd_ui doesn't try to return a value out of range.
+   This was wrong in gmp 4.1.2 with a long long limb.  */
+static void
+check_ui_range (void)
+{
+  unsigned long  got;
+  mpz_t  x;
+  int  i;
+
+  mpz_init_set_ui (x, ULONG_MAX);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_mul_2exp (x, x, 1L);
+      got = mpz_gcd_ui (NULL, x, 0L);
+      if (got != 0)
+        {
+          printf ("mpz_gcd_ui (ULONG_MAX*2^%d, 0)\n", i);
+          printf ("   return %#lx\n", got);
+          printf ("   should be 0\n");
+          abort ();
+        }
+    }
+
+  mpz_clear (x);
+}
+
+static void
+check_ui_factors (void)
+{
+#define NUM_FACTORS 9
+  static const char* factors[NUM_FACTORS] = {
+    "641", "274177", "3", "5", "17", "257", "65537",
+    "59649589127497217", "1238926361552897" };
+  unsigned long  got;
+  mpz_t  x, b, d, f, g;
+  int  i, j;
+  gmp_randstate_ptr rands;
+
+  if (GMP_NUMB_BITS < 5 || GMP_NUMB_BITS == 8
+      || GMP_NUMB_BITS == 16 || GMP_NUMB_BITS > 511)
+    {
+      printf ("No usable factors for 2^%i+1.\n", GMP_NUMB_BITS);
+      return;
+    }
+
+  mpz_init (x);
+  mpz_init (d);
+  mpz_init (f);
+  mpz_init (g);
+
+  mpz_setbit (x, GMP_NUMB_BITS);
+  mpz_add_ui (x, x, 1);
+
+  for (i = 0; i < NUM_FACTORS; ++i)
+    {
+      mpz_set_str (f, factors[i], 10);
+      if (mpz_divisible_p (x, f))
+	{
+	  mpz_mul_2exp (f, f, 1);
+	  /* d is an odd multiple of the factor f, exactly filling a limb. */
+	  mpz_sub (d, x, f);
+	  /* f = 2^GMP_NUMB_BITS mod d. */
+	  mpz_sub_ui (f, f, 1);
+	  break;
+	}
+    }
+
+  mpz_gcd (g, f, d);
+  if (mpz_even_p (d) || mpz_cmp (d, f) <= 0 || mpz_cmp_ui (g, 1) != 0)
+    {
+      printf ("No usable factor found.\n");
+      abort ();
+    }
+
+  rands = RANDS;
+  mpz_mul_ui (x, d, gmp_urandomm_ui (rands, 30000) + 1);
+
+  mpz_init (b);
+  mpz_setbit (b, GMP_NUMB_BITS - 1);
+  for (j = 0; j < 4; ++j)
+    {
+      mpz_add (x, x, b);
+
+      for (i = 1; i >= -1; --i)
+	{
+	  if (mpz_fits_ulong_p (d)
+	      && ((got = mpz_gcd_ui (NULL, x, mpz_get_ui (d)))
+		  != (i != 0 ? 1 : mpz_get_ui (d))))
+	    {
+	      printf ("mpz_gcd_ui (f, kV+%i*2^%i, V): error (j = %i)\n", i, GMP_NUMB_BITS - 1, j);
+	      printf ("   return %#lx\n", got);
+	      printf ("   should be %#lx\n", (i != 0 ? 1 : mpz_get_ui (d)));
+	      abort ();
+	    }
+
+	  mpz_gcd (g, x, d);
+	  if ((mpz_cmp_ui (g, 1) == 0) != (i != 0))
+	    {
+	      printf ("mpz_gcd (f, kV+%i*2^%i, V): error (j = %i)\n", i, GMP_NUMB_BITS - 1, j);
+	      printf ("   should%s be one.\n",(i != 0 ? "" : " not"));
+	      abort ();
+	    }
+
+	  mpz_sub (x, x, b);
+	}
+      /* Back to the original x. */
+      mpz_addmul_ui (x, b, 2);
+      mpz_mul (b, b, f);
+      mpz_mod (b, b, d);
+    }
+
+  mpz_clear (g);
+  mpz_clear (x);
+  mpz_clear (f);
+  mpz_clear (d);
+  mpz_clear (b);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_ui_range ();
+  check_ui_factors ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-get_d.c b/tests/mpz/t-get_d.c
new file mode 100644
index 0000000..f4c839f
--- /dev/null
+++ b/tests/mpz/t-get_d.c

@@ -0,0 +1,80 @@
+/* Test mpz_get_d.
+
+Copyright 2002, 2012, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_onebit (void)
+{
+  int     i;
+  mpz_t   z;
+  double  got, want;
+  /* FIXME: It'd be better to base this on the float format. */
+#if defined (__vax) || defined (__vax__)
+  int     limit = 127 - 1;  /* vax fp numbers have limited range */
+#else
+  int     limit = 512;
+#endif
+
+  mpz_init (z);
+
+  got = mpz_get_d (z);
+  if (got != 0)
+    {
+      printf    ("mpz_get_d wrong on zero\n");
+      abort();
+    }
+
+  mpz_set_ui (z, 1L);
+  want = 1.0;
+
+  for (i = 0; i < limit; i++)
+    {
+      got = mpz_get_d (z);
+
+      if (got != want)
+        {
+          printf    ("mpz_get_d wrong on 2**%d\n", i);
+          mpz_trace ("   z    ", z);
+          printf    ("   want  %.20g\n", want);
+          printf    ("   got   %.20g\n", got);
+          abort();
+        }
+
+      mpz_mul_2exp (z, z, 1L);
+      want *= 2.0;
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_onebit ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-get_d_2exp.c b/tests/mpz/t-get_d_2exp.c
new file mode 100644
index 0000000..04cf1a4
--- /dev/null
+++ b/tests/mpz/t-get_d_2exp.c

@@ -0,0 +1,222 @@
+/* Test mpz_get_d_2exp.
+
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+static void
+check_zero (void)
+{
+  mpz_t   z;
+  double  got, want;
+  long    got_exp, want_exp;
+
+  mpz_init_set_ui (z, 0);
+
+  want = 0.0;
+  want_exp = 0;
+  got = mpz_get_d_2exp (&got_exp, z);
+  if (got != want || got_exp != want_exp)
+    {
+      printf    ("mpz_get_d_2exp wrong on zero\n");
+      mpz_trace ("   z    ", z);
+      d_trace   ("   want ", want);
+      d_trace   ("   got  ", got);
+      printf    ("   want exp %ld\n", want_exp);
+      printf    ("   got exp  %ld\n", got_exp);
+      abort();
+    }
+
+  mpz_clear (z);
+}
+
+static void
+check_onebit (void)
+{
+  static const unsigned long data[] = {
+    1, 32, 52, 53, 54, 63, 64, 65, 128, 256, 511, 512, 513
+  };
+  mpz_t   z;
+  double  got, want;
+  long    got_exp, want_exp;
+  int     i;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_ui (z, 1L);
+      mpz_mul_2exp (z, z, data[i]);
+      want = 0.5;
+      want_exp = data[i] + 1;
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (got != want || got_exp != want_exp)
+        {
+          printf    ("mpz_get_d_2exp wrong on 2**%ld\n", data[i]);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   want ", want);
+          d_trace   ("   got  ", got);
+          printf    ("   want exp %ld\n", want_exp);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+
+      mpz_set_si (z, -1L);
+      mpz_mul_2exp (z, z, data[i]);
+      want = -0.5;
+      want_exp = data[i] + 1;
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (got != want || got_exp != want_exp)
+        {
+          printf    ("mpz_get_d_2exp wrong on -2**%ld\n", data[i]);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   want ", want);
+          d_trace   ("   got  ", got);
+          printf    ("   want exp %ld\n", want_exp);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+    }
+  mpz_clear (z);
+}
+
+/* Check that hardware rounding doesn't make mpz_get_d_2exp return a value
+   outside its defined range. */
+static void
+check_round (void)
+{
+  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };
+  mpz_t   z;
+  double  got;
+  long    got_exp;
+  int     i, rnd_mode, old_rnd_mode;
+
+  mpz_init (z);
+  old_rnd_mode = tests_hardware_getround ();
+
+  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)
+    {
+      tests_hardware_setround (rnd_mode);
+
+      for (i = 0; i < numberof (data); i++)
+        {
+          mpz_set_ui (z, 1L);
+          mpz_mul_2exp (z, z, data[i]);
+          mpz_sub_ui (z, z, 1L);
+
+          got = mpz_get_d_2exp (&got_exp, z);
+          if (got < 0.5 || got >= 1.0)
+            {
+              printf    ("mpz_get_d_2exp wrong on 2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect 0.5 <= got < 1.0\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpz_trace ("   z    ", z);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+
+          mpz_neg (z, z);
+          got = mpz_get_d_2exp (&got_exp, z);
+          if (got <= -1.0 || got > -0.5)
+            {
+              printf    ("mpz_get_d_2exp wrong on -2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect -1.0 < got <= -0.5\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpz_trace ("   z    ", z);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+        }
+    }
+
+  mpz_clear (z);
+  tests_hardware_setround (old_rnd_mode);
+}
+
+static void
+check_rand (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  int     i;
+  mpz_t   z;
+  double  got;
+  long    got_exp;
+  unsigned long  bits;
+
+  mpz_init (z);
+
+  for (i = 0; i < 200; i++)
+    {
+      bits = gmp_urandomm_ui (rands, 512L);
+      mpz_urandomb (z, rands, bits);
+
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (mpz_sgn (z) == 0)
+        continue;
+      bits = mpz_sizeinbase (z, 2);
+
+      if (got < 0.5 || got >= 1.0)
+        {
+          printf    ("mpz_get_d_2exp out of range, expect 0.5 <= got < 1.0\n");
+          mpz_trace ("   z    ", z);
+          d_trace   ("   got  ", got);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+
+      /* FIXME: If mpz_get_d_2exp rounds upwards we might have got_exp ==
+         bits+1, so leave this test disabled until we decide if that's what
+         should happen, or not.  */
+#if 0
+      if (got_exp != bits)
+        {
+          printf    ("mpz_get_d_2exp wrong exponent\n", i);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   bits ", bits);
+          d_trace   ("   got  ", got);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+#endif
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_zero ();
+  check_onebit ();
+  check_round ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-get_si.c b/tests/mpz/t-get_si.c
new file mode 100644
index 0000000..51f104a
--- /dev/null
+++ b/tests/mpz/t-get_si.c

@@ -0,0 +1,121 @@
+/* Exercise mpz_get_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *n;
+    long        want;
+  } data[] = {
+    { "0",      0L },
+    { "1",      1L },
+    { "-1",     -1L },
+    { "2",      2L },
+    { "-2",     -2L },
+    { "12345",  12345L },
+    { "-12345", -12345L },
+  };
+
+  int    i;
+  mpz_t  n;
+  long   got;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+
+      got = mpz_get_si (n);
+      if (got != data[i].want)
+	{
+	  printf ("mpz_get_si wrong at data[%d]\n", i);
+	  printf ("   n     \"%s\" (", data[i].n);
+	  mpz_out_str (stdout, 10, n); printf (", hex ");
+	  mpz_out_str (stdout, 16, n); printf (")\n");
+	  printf ("   got   %ld (0x%lX)\n", got, got);
+	  printf ("   want  %ld (0x%lX)\n", data[i].want, data[i].want);
+	  abort();
+	}
+    }
+  mpz_clear (n);
+}
+
+
+void
+check_max (void)
+{
+  mpz_t  n;
+  long   want;
+  long   got;
+
+  mpz_init (n);
+
+#define CHECK_MAX(name)                                 \
+  if (got != want)                                      \
+    {                                                   \
+      printf ("mpz_get_si wrong on %s\n", name);        \
+      printf ("   n    ");                              \
+      mpz_out_str (stdout, 10, n); printf (", hex ");   \
+      mpz_out_str (stdout, 16, n); printf ("\n");       \
+      printf ("   got  %ld, hex %lX\n", got, got);      \
+      printf ("   want %ld, hex %lX\n", want, want);    \
+      abort();                                          \
+    }
+
+  want = LONG_MAX;
+  mpz_set_si (n, want);
+  got = mpz_get_si (n);
+  CHECK_MAX ("LONG_MAX");
+
+  want = LONG_MIN;
+  mpz_set_si (n, want);
+  got = mpz_get_si (n);
+  CHECK_MAX ("LONG_MIN");
+
+  /* The following checks that -0x100000000 gives -0x80000000.  This doesn't
+     actually fit in a long and the result from mpz_get_si() is undefined,
+     but -0x80000000 is what comes out currently, and it should be that
+     value irrespective of the mp_limb_t size (long or long long).  */
+
+  want = LONG_MIN;
+  mpz_mul_2exp (n, n, 1);
+  CHECK_MAX ("-0x100...00");
+
+  mpz_clear (n);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_max ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-hamdist.c b/tests/mpz/t-hamdist.c
new file mode 100644
index 0000000..544a03f
--- /dev/null
+++ b/tests/mpz/t-hamdist.c

@@ -0,0 +1,123 @@
+/* Test mpz_hamdist.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_twobits (void)
+{
+  unsigned long  i, j, got, want;
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+  for (i = 0; i < 5 * GMP_NUMB_BITS; i++)
+    {
+      for (j = 0; j < 5 * GMP_NUMB_BITS; j++)
+        {
+          mpz_set_ui (x, 0L);
+          mpz_setbit (x, i);
+          mpz_set_ui (y, 0L);
+          mpz_setbit (y, j);
+
+          want = 2 * (i != j);
+          got = mpz_hamdist (x, y);
+          if (got != want)
+            {
+              printf    ("mpz_hamdist wrong on 2 bits pos/pos\n");
+            wrong:
+              printf    ("  i    %lu\n", i);
+              printf    ("  j    %lu\n", j);
+              printf    ("  got  %lu\n", got);
+              printf    ("  want %lu\n", want);
+              mpz_trace ("  x   ", x);
+              mpz_trace ("  y   ", y);
+              abort();
+            }
+
+          mpz_neg (x, x);
+          mpz_neg (y, y);
+          want = ABS ((long) (i-j));
+          got = mpz_hamdist (x, y);
+          if (got != want)
+            {
+              printf    ("mpz_hamdist wrong on 2 bits neg/neg\n");
+              goto wrong;
+            }
+        }
+
+    }
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  got, want;
+  int    i;
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+
+  for (i = 0; i < 2000; i++)
+    {
+      mpz_erandomb (x, rands, 6 * GMP_NUMB_BITS);
+      mpz_negrandom (x, rands);
+      mpz_mul_2exp (x, x, urandom() % (4 * GMP_NUMB_BITS));
+
+      mpz_erandomb (y, rands, 6 * GMP_NUMB_BITS);
+      mpz_negrandom (y, rands);
+      mpz_mul_2exp (y, y, urandom() % (4 * GMP_NUMB_BITS));
+
+      want = refmpz_hamdist (x, y);
+      got = mpz_hamdist (x, y);
+      if (got != want)
+        {
+          printf    ("mpz_hamdist wrong on random\n");
+          printf    ("  got  %lu\n", got);
+          printf    ("  want %lu\n", want);
+          mpz_trace ("  x   ", x);
+          mpz_trace ("  y   ", y);
+          abort();
+        }
+    }
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_twobits ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-import.c b/tests/mpz/t-import.c
new file mode 100644
index 0000000..a295317
--- /dev/null
+++ b/tests/mpz/t-import.c

@@ -0,0 +1,175 @@
+/* Test mpz_import.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *want;
+    size_t      count;
+    int         order;
+    size_t      size;
+    int         endian;
+    int         nail;
+    char        src[64];
+
+  } data[] = {
+
+    { "0", 0,1, 1,1, 0 },
+    { "0", 1,1, 0,1, 0 },
+
+    { "0x12345678", 4,1,  1,1, 0, { '\22', '\64', '\126', '\170' } },
+    { "0x12345678", 1,1,  4,1, 0, { '\22', '\64', '\126', '\170' } },
+    { "0x12345678", 1,-1, 4,1, 0, { '\22', '\64', '\126', '\170' } },
+
+    { "0x12345678", 4,-1, 1,-1, 0, { '\170', '\126', '\064', '\22' } },
+    { "0x12345678", 1,1,  4,-1, 0, { '\170', '\126', '\064', '\22' } },
+    { "0x12345678", 1,-1, 4,-1, 0, { '\170', '\126', '\064', '\22' } },
+
+    { "0",    5,1,  1,1, 7, { '\376', '\376', '\376', '\376', '\376' } },
+    { "0",    5,-1, 1,1, 7, { '\376', '\376', '\376', '\376', '\376' } },
+    { "0x15", 5,1,  1,1, 7, { '\377', '\376', '\377', '\376', '\377' } },
+
+    { "0",    3,1,  2,1,   1, { '\200','\000', '\200','\000', '\200','\000' }},
+    { "0",    3,1,  2,-1,  1, { '\000','\200', '\000','\200', '\000','\200' }},
+    { "0",    3,1,  2,1,  15, { '\377','\376', '\377','\376', '\377','\376' }},
+
+    { "0x2A", 3,1,  2,1, 14, { '\377','\376', '\377','\376', '\377','\376' } },
+    { "0x06", 3,1,  2,1, 14, { '\377','\374', '\377','\375', '\377','\376' } },
+    { "0x24", 3,-1, 2,1, 14, { '\377','\374', '\377','\375', '\377','\376' } },
+
+    { "0x123456789ABC", 3,1,  2,1,  0, {
+        '\022','\064', '\126','\170', '\232','\274' } },
+    { "0x123456789ABC", 3,-1, 2,1,  0, {
+        '\232','\274', '\126','\170', '\022','\064' } },
+    { "0x123456789ABC", 3,1,  2,-1, 0, {
+        '\064','\022', '\170','\126', '\274','\232' } },
+    { "0x123456789ABC", 3,-1, 2,-1, 0, {
+        '\274','\232', '\170','\126', '\064','\022' } },
+
+    { "0x112233445566778899AABBCC", 3,1,  4,1,  0,
+      { '\021','\042','\063','\104',
+        '\125','\146','\167','\210',
+        '\231','\252','\273','\314' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,1,  0,
+      { '\231','\252','\273','\314',
+        '\125','\146','\167','\210',
+        '\021','\042','\063','\104' } },
+    { "0x112233445566778899AABBCC", 3,1,  4,-1, 0,
+      { '\104','\063','\042','\021',
+        '\210','\167','\146','\125',
+        '\314','\273','\252','\231' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,-1, 0,
+      { '\314','\273','\252','\231',
+        '\210','\167','\146','\125',
+        '\104','\063','\042','\021' } },
+
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,1,  0,
+      { '\020','\001','\040','\002','\060','\003','\100','\004',
+        '\120','\005','\140','\006','\160','\007','\200','\010',
+        '\220','\011','\240','\012','\260','\013','\300','\014' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,1,  0,
+      { '\220','\011','\240','\012','\260','\013','\300','\014',
+        '\120','\005','\140','\006','\160','\007','\200','\010',
+        '\020','\001','\040','\002','\060','\003','\100','\004' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,-1, 0,
+      { '\004','\100','\003','\060','\002','\040','\001','\020',
+        '\010','\200','\007','\160','\006','\140','\005','\120',
+        '\014','\300','\013','\260','\012','\240','\011','\220' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,-1, 0,
+      { '\014','\300','\013','\260','\012','\240','\011','\220',
+        '\010','\200','\007','\160','\006','\140','\005','\120',
+        '\004','\100','\003','\060','\002','\040','\001','\020' } },
+
+    { "0x155555555555555555555555", 3,1,  4,1,  1,
+      { '\325','\125','\125','\125',
+        '\252','\252','\252','\252',
+        '\325','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,1,  1,
+      { '\325','\125','\125','\125',
+        '\252','\252','\252','\252',
+        '\325','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,1,  4,-1,  1,
+      { '\125','\125','\125','\325',
+        '\252','\252','\252','\252',
+        '\125','\125','\125','\325' } },
+    { "0x155555555555555555555555", 3,-1,  4,-1,  1,
+      { '\125','\125','\125','\325',
+        '\252','\252','\252','\252',
+        '\125','\125','\125','\325' } },
+  };
+
+  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t)];
+  char    *src;
+  size_t  align;
+  int     i;
+  mpz_t   got, want;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (align = 0; align < sizeof (mp_limb_t); align++)
+        {
+          mpz_set_str_or_abort (want, data[i].want, 0);
+          src = buf + align;
+          memcpy (src, data[i].src, data[i].count * data[i].size);
+
+          mpz_set_ui (got, 0L);
+          mpz_import (got, data[i].count, data[i].order,
+                      data[i].size, data[i].endian, data[i].nail, src);
+
+          MPZ_CHECK_FORMAT (got);
+          if (mpz_cmp (got, want) != 0)
+            {
+              printf ("wrong at data[%d]\n", i);
+              printf ("    count=%lu order=%d  size=%lu endian=%d nail=%u  align=%lu\n",
+                      (unsigned long) data[i].count, data[i].order,
+                      (unsigned long) data[i].size, data[i].endian, data[i].nail,
+                      (unsigned long) align);
+              mpz_trace ("    got ", got);
+              mpz_trace ("    want", want);
+              abort ();
+            }
+        }
+    }
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  mp_trace_base = -16;
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-inp_str.c b/tests/mpz/t-inp_str.c
new file mode 100644
index 0000000..6c1c0c4
--- /dev/null
+++ b/tests/mpz/t-inp_str.c

@@ -0,0 +1,198 @@
+/* Test mpz_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>		/* for unlink */
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "/tmp/t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+
+    { "abc", 10, "0", 0 },
+    { "0xf", 10, "0", 1 },
+    { "ghi", 16, "0", 0 },
+    { "100", 90, "0", 0 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    {  "z", 36, "35", 1 },
+    {  "Z", 36, "35", 1 },
+    { "1B", 59, "70", 2 },
+    {  "a", 60, "36", 1 },
+    {  "A", 61, "10", 1 },
+
+    {  "0x0",    0,   "0", 3 },
+    {  "0X10",   0,  "16", 4 },
+    { "-0X0",    0,   "0", 4 },
+    { "-0x10",   0, "-16", 5 },
+
+    {  "0b0",    0,  "0", 3 },
+    {  "0B10",   0,  "2", 4 },
+    { "-0B0",    0,  "0", 4 },
+    { "-0b10",   0, "-2", 5 },
+
+    {  "00",   0,  "0", 2 },
+    {  "010",  0,  "8", 3 },
+    { "-00",   0,  "0", 3 },
+    { "-010",  0, "-8", 4 },
+
+    {  "0x",     0,   "0", 2 },
+    {  "0",      0,   "0", 1 },
+    { " 030",   10,  "30", 4 },
+  };
+
+  mpz_t  got, want;
+  long   ftell_nread;
+  int    i, pre, post, j, got_nread, want_nread;
+  FILE   *fp;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (pre = 0; pre <= 3; pre++)
+	{
+	  for (post = 0; post <= 2; post++)
+	    {
+	      mpz_set_str_or_abort (want, data[i].want, 0);
+	      MPZ_CHECK_FORMAT (want);
+
+	      /* create the file new each time to ensure its length is what
+		 we want */
+	      fp = fopen (FILENAME, "w+");
+	      ASSERT_ALWAYS (fp != NULL);
+	      for (j = 0; j < pre; j++)
+		putc (' ', fp);
+	      fputs (data[i].inp, fp);
+	      for (j = 0; j < post; j++)
+		putc (' ', fp);
+	      fflush (fp);
+	      ASSERT_ALWAYS (! ferror(fp));
+
+	      rewind (fp);
+	      got_nread = mpz_inp_str (got, fp, data[i].base);
+
+	      if (got_nread != 0)
+		{
+		  ftell_nread = ftell (fp);
+		  if (got_nread != ftell_nread)
+		    {
+		      printf ("mpz_inp_str nread wrong\n");
+		      printf ("  inp          \"%s\"\n", data[i].inp);
+		      printf ("  base         %d\n", data[i].base);
+		      printf ("  pre          %d\n", pre);
+		      printf ("  post         %d\n", post);
+		      printf ("  got_nread    %d\n", got_nread);
+		      printf ("  ftell_nread  %ld\n", ftell_nread);
+		      abort ();
+		    }
+		}
+
+	      /* if data[i].inp is a whole string to read and there's no post
+		 whitespace then expect to have EOF */
+	      if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+		{
+		  int  c = getc(fp);
+		  if (c != EOF)
+		    {
+		      printf ("mpz_inp_str didn't read to EOF\n");
+		      printf ("  inp   \"%s\"\n", data[i].inp);
+		      printf ("  base  %d\n", data[i].base);
+		      printf ("  pre   %d\n", pre);
+		      printf ("  post  %d\n", post);
+		      printf ("  c     '%c' %#x\n", c, c);
+		      abort ();
+		    }
+		}
+
+	      /* only expect "pre" included in the count when non-zero */
+	      want_nread = data[i].want_nread;
+	      if (want_nread != 0)
+		want_nread += pre;
+
+	      if (got_nread != want_nread)
+		{
+		  printf ("mpz_inp_str nread wrong\n");
+		  printf ("  inp         \"%s\"\n", data[i].inp);
+		  printf ("  base        %d\n", data[i].base);
+		  printf ("  pre         %d\n", pre);
+		  printf ("  post        %d\n", post);
+		  printf ("  got_nread   %d\n", got_nread);
+		  printf ("  want_nread  %d\n", want_nread);
+		  abort ();
+		}
+
+	      MPZ_CHECK_FORMAT (got);
+
+	      if (mpz_cmp (got, want) != 0)
+		{
+		  printf ("mpz_inp_str wrong result\n");
+		  printf ("  inp   \"%s\"\n", data[i].inp);
+		  printf ("  base  %d\n", data[i].base);
+		  mpz_trace ("  got ",  got);
+		  mpz_trace ("  want", want);
+		  abort ();
+		}
+
+	      ASSERT_ALWAYS (fclose (fp) == 0);
+	    }
+	}
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-invert.c b/tests/mpz/t-invert.c
new file mode 100644
index 0000000..0081c49
--- /dev/null
+++ b/tests/mpz/t-invert.c

@@ -0,0 +1,117 @@
+/* Test mpz_invert.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2005, 2008, 2009, 2012, 2014 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, m, ainv, t;
+  int test, r;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  int reps = 1000;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (a);
+  mpz_init (m);
+  mpz_init (ainv);
+  mpz_init (t);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (a, rands, mpz_get_ui (bs));
+      do {
+	mpz_urandomb (bs, rands, size_range);
+	mpz_rrandomb (m, rands, mpz_get_ui (bs));
+      } while (mpz_sgn (m) == 0);
+
+      mpz_urandomb (bs, rands, 8);
+      bsi = mpz_get_ui (bs);
+
+      if ((bsi & 1) != 0)
+	mpz_neg (a, a);
+      if ((bsi & 2) != 0)
+	mpz_neg (m, m);
+
+      r = mpz_invert (ainv, a, m);
+      if (r != 0)
+	{
+	  MPZ_CHECK_FORMAT (ainv);
+
+	  if (mpz_cmp_ui (ainv, 0) < 0 || mpz_cmpabs (ainv, m) >= 0)
+	    {
+	      fprintf (stderr, "ERROR in test %d\n", test);
+	      gmp_fprintf (stderr, "Inverse out of range.\n");
+	      gmp_fprintf (stderr, "a = %Zx\n", a);
+	      gmp_fprintf (stderr, "1/a = %Zx\n", ainv);
+	      gmp_fprintf (stderr, "m = %Zx\n", m);
+	      abort ();
+	    }
+
+	  mpz_mul (t, ainv, a);
+	  mpz_mod (t, t, m);
+
+	  if (mpz_cmp_ui (t, mpz_cmpabs_ui (m, 1) != 0) != 0)
+	    {
+	      fprintf (stderr, "ERROR in test %d\n", test);
+	      gmp_fprintf (stderr, "a^(-1)*a != 1 (mod m)\n");
+	      gmp_fprintf (stderr, "a = %Zx\n", a);
+	      gmp_fprintf (stderr, "m = %Zx\n", m);
+	      abort ();
+	    }
+	}
+      else /* Inverse deos not exist */
+	{
+	  mpz_gcd (t, a, m);
+	  if (mpz_cmp_ui (t, 1) == 0)
+	    {
+	      fprintf (stderr, "ERROR in test %d\n", test);
+	      gmp_fprintf (stderr, "Inverse exists, but was not found.\n");
+	      gmp_fprintf (stderr, "a = %Zx\n", a);
+	      gmp_fprintf (stderr, "m = %Zx\n", m);
+	      abort ();
+	    }
+	}
+    }
+
+  mpz_clear (bs);
+  mpz_clear (a);
+  mpz_clear (m);
+  mpz_clear (ainv);
+  mpz_clear (t);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-io_raw.c b/tests/mpz/t-io_raw.c
new file mode 100644
index 0000000..b840b64
--- /dev/null
+++ b/tests/mpz/t-io_raw.c

@@ -0,0 +1,286 @@
+/* Test mpz_inp_raw and mpz_out_raw.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "/tmp/t-io_raw.tmp"
+
+
+/* In the fopen, "b" selects binary mode on DOS systems, meaning no
+   conversion of '\n' to and from CRLF.  It's believed systems without such
+   nonsense will simply ignore the "b", but in case that's not so a plain
+   "w+" is attempted if "w+b" fails.  */
+
+FILE *
+fopen_wplusb_or_die (const char *filename)
+{
+  FILE  *fp;
+  fp = fopen (filename, "w+b");
+  if (fp == NULL)
+    fp = fopen (filename, "w+");
+
+  if (fp == NULL)
+    {
+      printf ("Cannot create file %s\n", filename);
+      abort ();
+    }
+  return fp;
+}
+
+/* use 0x80 to check nothing bad happens with sign extension etc */
+#define BYTEVAL(i)  (((i) + 1) | 0x80)
+
+void
+check_in (void)
+{
+  int        i, j, zeros, neg, error = 0;
+  mpz_t      want, got;
+  size_t     want_ret, got_ret;
+  mp_size_t  size;
+  FILE       *fp;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  for (i = 0; i < 32; i++)
+    {
+      for (zeros = 0; zeros < 8; zeros++)
+	{
+	  for (neg = 0; neg <= 1; neg++)
+	    {
+	      want_ret = i + zeros + 4;
+
+	      /* need this to get the twos complement right */
+	      ASSERT_ALWAYS (sizeof (size) >= 4);
+
+	      size = i + zeros;
+	      if (neg)
+		size = -size;
+
+	      fp = fopen_wplusb_or_die (FILENAME);
+	      for (j = 3; j >= 0; j--)
+		ASSERT_ALWAYS (putc ((size >> (j*8)) & 0xFF, fp) != EOF);
+	      for (j = 0; j < zeros; j++)
+		ASSERT_ALWAYS (putc ('\0', fp) != EOF);
+	      for (j = 0; j < i; j++)
+		ASSERT_ALWAYS (putc (BYTEVAL (j), fp) != EOF);
+	      /* and some trailing garbage */
+	      ASSERT_ALWAYS (putc ('x', fp) != EOF);
+	      ASSERT_ALWAYS (putc ('y', fp) != EOF);
+	      ASSERT_ALWAYS (putc ('z', fp) != EOF);
+	      ASSERT_ALWAYS (fflush (fp) == 0);
+	      rewind (fp);
+
+	      got_ret = mpz_inp_raw (got, fp);
+	      ASSERT_ALWAYS (! ferror(fp));
+	      ASSERT_ALWAYS (fclose (fp) == 0);
+
+	      MPZ_CHECK_FORMAT (got);
+
+	      if (got_ret != want_ret)
+		{
+		  printf ("check_in: return value wrong\n");
+		  error = 1;
+		}
+	      if (mpz_cmp (got, want) != 0)
+		{
+		  printf ("check_in: result wrong\n");
+		  error = 1;
+		}
+	      if (error)
+		{
+		  printf    ("  i=%d zeros=%d neg=%d\n", i, zeros, neg);
+		  printf    ("  got_ret  %lu\n", (unsigned long) got_ret);
+		  printf    ("  want_ret %lu\n", (unsigned long) want_ret);
+		  mpz_trace ("  got      ", got);
+		  mpz_trace ("  want     ", want);
+		  abort ();
+		}
+
+	      mpz_neg (want, want);
+	    }
+	}
+      mpz_mul_2exp (want, want, 8);
+      mpz_add_ui (want, want, (unsigned long) BYTEVAL (i));
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+
+void
+check_out (void)
+{
+  int        i, j, neg, error = 0;
+  mpz_t      z;
+  char       want[256], got[256], *p;
+  size_t     want_len, got_ret, got_read;
+  mp_size_t  size;
+  FILE       *fp;
+
+  mpz_init (z);
+
+  for (i = 0; i < 32; i++)
+    {
+      for (neg = 0; neg <= 1; neg++)
+	{
+	  want_len = i + 4;
+
+	  /* need this to get the twos complement right */
+	  ASSERT_ALWAYS (sizeof (size) >= 4);
+
+	  size = i;
+	  if (neg)
+	    size = -size;
+
+	  p = want;
+	  for (j = 3; j >= 0; j--)
+	    *p++ = size >> (j*8);
+	  for (j = 0; j < i; j++)
+	    *p++ = BYTEVAL (j);
+	  ASSERT_ALWAYS (p <= want + sizeof (want));
+
+	  fp = fopen_wplusb_or_die (FILENAME);
+	  got_ret = mpz_out_raw (fp, z);
+	  ASSERT_ALWAYS (fflush (fp) == 0);
+	  rewind (fp);
+	  got_read = fread (got, 1, sizeof(got), fp);
+	  ASSERT_ALWAYS (! ferror(fp));
+	  ASSERT_ALWAYS (fclose (fp) == 0);
+
+	  if (got_ret != want_len)
+	    {
+	      printf ("check_out: wrong return value\n");
+	      error = 1;
+	    }
+	  if (got_read != want_len)
+	    {
+	      printf ("check_out: wrong number of bytes read back\n");
+	      error = 1;
+	    }
+	  if (memcmp (want, got, want_len) != 0)
+	    {
+	      printf ("check_out: wrong data\n");
+	      error = 1;
+	    }
+	  if (error)
+	    {
+	      printf    ("  i=%d neg=%d\n", i, neg);
+	      mpz_trace ("  z", z);
+	      printf    ("  got_ret  %lu\n", (unsigned long) got_ret);
+	      printf    ("  got_read %lu\n", (unsigned long) got_read);
+	      printf    ("  want_len %lu\n", (unsigned long) want_len);
+	      printf    ("  want");
+	      for (j = 0; j < want_len; j++)
+		printf (" %02X", (unsigned) (unsigned char) want[j]);
+	      printf    ("\n");
+	      printf    ("  got ");
+	      for (j = 0; j < want_len; j++)
+		printf (" %02X", (unsigned) (unsigned char) got[j]);
+	      printf    ("\n");
+	      abort ();
+	    }
+
+	  mpz_neg (z, z);
+	}
+      mpz_mul_2exp (z, z, 8);
+      mpz_add_ui (z, z, (unsigned long) BYTEVAL (i));
+    }
+
+  mpz_clear (z);
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr  rands = RANDS;
+  int        i, error = 0;
+  mpz_t      got, want;
+  size_t     inp_ret, out_ret;
+  FILE       *fp;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  for (i = 0; i < 500; i++)
+    {
+      mpz_erandomb (want, rands, 10*GMP_LIMB_BITS);
+      mpz_negrandom (want, rands);
+
+      fp = fopen_wplusb_or_die (FILENAME);
+      out_ret = mpz_out_raw (fp, want);
+      ASSERT_ALWAYS (fflush (fp) == 0);
+      rewind (fp);
+      inp_ret = mpz_inp_raw (got, fp);
+      ASSERT_ALWAYS (fclose (fp) == 0);
+
+      MPZ_CHECK_FORMAT (got);
+
+      if (inp_ret != out_ret)
+	{
+	  printf ("check_rand: different inp/out return values\n");
+	  error = 1;
+	}
+      if (mpz_cmp (got, want) != 0)
+	{
+	  printf ("check_rand: wrong result\n");
+	  error = 1;
+	}
+      if (error)
+	{
+	  printf    ("  out_ret %lu\n", (unsigned long) out_ret);
+	  printf    ("  inp_ret %lu\n", (unsigned long) inp_ret);
+	  mpz_trace ("  want", want);
+	  mpz_trace ("  got ", got);
+	  abort ();
+	}
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_in ();
+  check_out ();
+  check_rand ();
+
+  unlink (FILENAME);
+  tests_end ();
+
+  exit (0);
+}

diff --git a/tests/mpz/t-jac.c b/tests/mpz/t-jac.c
new file mode 100644
index 0000000..ed8aff8
--- /dev/null
+++ b/tests/mpz/t-jac.c

@@ -0,0 +1,1008 @@
+/* Exercise mpz_*_kronecker_*() and mpz_jacobi() functions.
+
+Copyright 1999-2004, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* With no arguments the various Kronecker/Jacobi symbol routines are
+   checked against some test data and a lot of derived data.
+
+   To check the test data against PARI-GP, run
+
+	   t-jac -p | gp -q
+
+   Enhancements:
+
+   More big test cases than those given by check_squares_zi would be good.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef _LONG_LONG_LIMB
+#define LL(l,ll)  ll
+#else
+#define LL(l,ll)  l
+#endif
+
+
+int option_pari = 0;
+
+
+unsigned long
+mpz_mod4 (mpz_srcptr z)
+{
+  mpz_t          m;
+  unsigned long  ret;
+
+  mpz_init (m);
+  mpz_fdiv_r_2exp (m, z, 2);
+  ret = mpz_get_ui (m);
+  mpz_clear (m);
+  return ret;
+}
+
+int
+mpz_fits_ulimb_p (mpz_srcptr z)
+{
+  return (SIZ(z) == 1 || SIZ(z) == 0);
+}
+
+mp_limb_t
+mpz_get_ulimb (mpz_srcptr z)
+{
+  if (SIZ(z) == 0)
+    return 0;
+  else
+    return PTR(z)[0];
+}
+
+
+void
+try_base (mp_limb_t a, mp_limb_t b, int answer)
+{
+  int  got;
+
+  if ((b & 1) == 0 || b == 1 || a > b)
+    return;
+
+  got = mpn_jacobi_base (a, b, 0);
+  if (got != answer)
+    {
+      printf (LL("mpn_jacobi_base (%lu, %lu) is %d should be %d\n",
+		 "mpn_jacobi_base (%llu, %llu) is %d should be %d\n"),
+	      a, b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_zi_ui (mpz_srcptr a, unsigned long b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker_ui (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker_ui (");
+      mpz_out_str (stdout, 10, a);
+      printf (", %lu) is %d should be %d\n", b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_zi_si (mpz_srcptr a, long b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker_si (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker_si (");
+      mpz_out_str (stdout, 10, a);
+      printf (", %ld) is %d should be %d\n", b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_ui_zi (unsigned long a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_ui_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_ui_kronecker (%lu, ", a);
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_si_zi (long a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_si_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_si_kronecker (%ld, ", a);
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+/* Don't bother checking mpz_jacobi, since it only differs for b even, and
+   we don't have an actual expected answer for it.  tests/devel/try.c does
+   some checks though.  */
+void
+try_zi_zi (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker (");
+      mpz_out_str (stdout, 10, a);
+      printf (", ");
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_pari (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+  printf ("try(");
+  mpz_out_str (stdout, 10, a);
+  printf (",");
+  mpz_out_str (stdout, 10, b);
+  printf (",%d)\n", answer);
+}
+
+
+void
+try_each (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+#if 0
+  fprintf(stderr, "asize = %d, bsize = %d\n",
+	  mpz_sizeinbase (a, 2), mpz_sizeinbase (b, 2));
+#endif
+  if (option_pari)
+    {
+      try_pari (a, b, answer);
+      return;
+    }
+
+  if (mpz_fits_ulimb_p (a) && mpz_fits_ulimb_p (b))
+    try_base (mpz_get_ulimb (a), mpz_get_ulimb (b), answer);
+
+  if (mpz_fits_ulong_p (b))
+    try_zi_ui (a, mpz_get_ui (b), answer);
+
+  if (mpz_fits_slong_p (b))
+    try_zi_si (a, mpz_get_si (b), answer);
+
+  if (mpz_fits_ulong_p (a))
+    try_ui_zi (mpz_get_ui (a), b, answer);
+
+  if (mpz_fits_sint_p (a))
+    try_si_zi (mpz_get_si (a), b, answer);
+
+  try_zi_zi (a, b, answer);
+}
+
+
+/* Try (a/b) and (a/-b). */
+void
+try_pn (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b;
+
+  mpz_init_set (b, b_orig);
+  try_each (a, b, answer);
+
+  mpz_neg (b, b);
+  if (mpz_sgn (a) < 0)
+    answer = -answer;
+
+  try_each (a, b, answer);
+
+  mpz_clear (b);
+}
+
+
+/* Try (a+k*p/b) for various k, using the fact (a/b) is periodic in a with
+   period p.  For b>0, p=b if b!=2mod4 or p=4*b if b==2mod4. */
+
+void
+try_periodic_num (mpz_srcptr a_orig, mpz_srcptr b, int answer)
+{
+  mpz_t  a, a_period;
+  int    i;
+
+  if (mpz_sgn (b) <= 0)
+    return;
+
+  mpz_init_set (a, a_orig);
+  mpz_init_set (a_period, b);
+  if (mpz_mod4 (b) == 2)
+    mpz_mul_ui (a_period, a_period, 4);
+
+  /* don't bother with these tests if they're only going to produce
+     even/even */
+  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (a_period))
+    goto done;
+
+  for (i = 0; i < 6; i++)
+    {
+      mpz_add (a, a, a_period);
+      try_pn (a, b, answer);
+    }
+
+  mpz_set (a, a_orig);
+  for (i = 0; i < 6; i++)
+    {
+      mpz_sub (a, a, a_period);
+      try_pn (a, b, answer);
+    }
+
+ done:
+  mpz_clear (a);
+  mpz_clear (a_period);
+}
+
+
+/* Try (a/b+k*p) for various k, using the fact (a/b) is periodic in b of
+   period p.
+
+			       period p
+	   a==0,1mod4             a
+	   a==2mod4              4*a
+	   a==3mod4 and b odd    4*a
+	   a==3mod4 and b even   8*a
+
+   In Henri Cohen's book the period is given as 4*a for all a==2,3mod4, but
+   a counterexample would seem to be (3/2)=-1 which with (3/14)=+1 doesn't
+   have period 4*a (but rather 8*a with (3/26)=-1).  Maybe the plain 4*a is
+   to be read as applying to a plain Jacobi symbol with b odd, rather than
+   the Kronecker extension to b even. */
+
+void
+try_periodic_den (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b, b_period;
+  int    i;
+
+  if (mpz_sgn (a) == 0 || mpz_sgn (b_orig) == 0)
+    return;
+
+  mpz_init_set (b, b_orig);
+
+  mpz_init_set (b_period, a);
+  if (mpz_mod4 (a) == 3 && mpz_even_p (b))
+    mpz_mul_ui (b_period, b_period, 8L);
+  else if (mpz_mod4 (a) >= 2)
+    mpz_mul_ui (b_period, b_period, 4L);
+
+  /* don't bother with these tests if they're only going to produce
+     even/even */
+  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (b_period))
+    goto done;
+
+  for (i = 0; i < 6; i++)
+    {
+      mpz_add (b, b, b_period);
+      try_pn (a, b, answer);
+    }
+
+  mpz_set (b, b_orig);
+  for (i = 0; i < 6; i++)
+    {
+      mpz_sub (b, b, b_period);
+      try_pn (a, b, answer);
+    }
+
+ done:
+  mpz_clear (b);
+  mpz_clear (b_period);
+}
+
+
+static const unsigned long  ktable[] = {
+  0, 1, 2, 3, 4, 5, 6, 7,
+  GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+  2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+  3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1
+};
+
+
+/* Try (a/b*2^k) for various k. */
+void
+try_2den (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b;
+  int    kindex;
+  int    answer_a2, answer_k;
+  unsigned long k;
+
+  /* don't bother when b==0 */
+  if (mpz_sgn (b_orig) == 0)
+    return;
+
+  mpz_init_set (b, b_orig);
+
+  /* (a/2) is 0 if a even, 1 if a==1 or 7 mod 8, -1 if a==3 or 5 mod 8 */
+  answer_a2 = (mpz_even_p (a) ? 0
+	       : (((SIZ(a) >= 0 ? PTR(a)[0] : -PTR(a)[0]) + 2) & 7) < 4 ? 1
+	       : -1);
+
+  for (kindex = 0; kindex < numberof (ktable); kindex++)
+    {
+      k = ktable[kindex];
+
+      /* answer_k = answer*(answer_a2^k) */
+      answer_k = (answer_a2 == 0 && k != 0 ? 0
+		  : (k & 1) == 1 && answer_a2 == -1 ? -answer
+		  : answer);
+
+      mpz_mul_2exp (b, b_orig, k);
+      try_pn (a, b, answer_k);
+    }
+
+  mpz_clear (b);
+}
+
+
+/* Try (a*2^k/b) for various k.  If it happens mpz_ui_kronecker() gets (2/b)
+   wrong it will show up as wrong answers demanded. */
+void
+try_2num (mpz_srcptr a_orig, mpz_srcptr b, int answer)
+{
+  mpz_t  a;
+  int    kindex;
+  int    answer_2b, answer_k;
+  unsigned long  k;
+
+  /* don't bother when a==0 */
+  if (mpz_sgn (a_orig) == 0)
+    return;
+
+  mpz_init (a);
+
+  /* (2/b) is 0 if b even, 1 if b==1 or 7 mod 8, -1 if b==3 or 5 mod 8 */
+  answer_2b = (mpz_even_p (b) ? 0
+	       : (((SIZ(b) >= 0 ? PTR(b)[0] : -PTR(b)[0]) + 2) & 7) < 4 ? 1
+	       : -1);
+
+  for (kindex = 0; kindex < numberof (ktable); kindex++)
+    {
+      k = ktable[kindex];
+
+      /* answer_k = answer*(answer_2b^k) */
+      answer_k = (answer_2b == 0 && k != 0 ? 0
+		  : (k & 1) == 1 && answer_2b == -1 ? -answer
+		  : answer);
+
+	mpz_mul_2exp (a, a_orig, k);
+      try_pn (a, b, answer_k);
+    }
+
+  mpz_clear (a);
+}
+
+
+/* The try_2num() and try_2den() routines don't in turn call
+   try_periodic_num() and try_periodic_den() because it hugely increases the
+   number of tests performed, without obviously increasing coverage.
+
+   Useful extra derived cases can be added here. */
+
+void
+try_all (mpz_t a, mpz_t b, int answer)
+{
+  try_pn (a, b, answer);
+  try_periodic_num (a, b, answer);
+  try_periodic_den (a, b, answer);
+  try_2num (a, b, answer);
+  try_2den (a, b, answer);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *a;
+    const char  *b;
+    int         answer;
+
+  } data[] = {
+
+    /* Note that the various derived checks in try_all() reduce the cases
+       that need to be given here.  */
+
+    /* some zeros */
+    {  "0",  "0", 0 },
+    {  "0",  "2", 0 },
+    {  "0",  "6", 0 },
+    {  "5",  "0", 0 },
+    { "24", "60", 0 },
+
+    /* (a/1) = 1, any a
+       In particular note (0/1)=1 so that (a/b)=(a mod b/b). */
+    { "0", "1", 1 },
+    { "1", "1", 1 },
+    { "2", "1", 1 },
+    { "3", "1", 1 },
+    { "4", "1", 1 },
+    { "5", "1", 1 },
+
+    /* (0/b) = 0, b != 1 */
+    { "0",  "3", 0 },
+    { "0",  "5", 0 },
+    { "0",  "7", 0 },
+    { "0",  "9", 0 },
+    { "0", "11", 0 },
+    { "0", "13", 0 },
+    { "0", "15", 0 },
+
+    /* (1/b) = 1 */
+    { "1",  "1", 1 },
+    { "1",  "3", 1 },
+    { "1",  "5", 1 },
+    { "1",  "7", 1 },
+    { "1",  "9", 1 },
+    { "1", "11", 1 },
+
+    /* (-1/b) = (-1)^((b-1)/2) which is -1 for b==3 mod 4 */
+    { "-1",  "1",  1 },
+    { "-1",  "3", -1 },
+    { "-1",  "5",  1 },
+    { "-1",  "7", -1 },
+    { "-1",  "9",  1 },
+    { "-1", "11", -1 },
+    { "-1", "13",  1 },
+    { "-1", "15", -1 },
+    { "-1", "17",  1 },
+    { "-1", "19", -1 },
+
+    /* (2/b) = (-1)^((b^2-1)/8) which is -1 for b==3,5 mod 8.
+       try_2num() will exercise multiple powers of 2 in the numerator.  */
+    { "2",  "1",  1 },
+    { "2",  "3", -1 },
+    { "2",  "5", -1 },
+    { "2",  "7",  1 },
+    { "2",  "9",  1 },
+    { "2", "11", -1 },
+    { "2", "13", -1 },
+    { "2", "15",  1 },
+    { "2", "17",  1 },
+
+    /* (-2/b) = (-1)^((b^2-1)/8)*(-1)^((b-1)/2) which is -1 for b==5,7mod8.
+       try_2num() will exercise multiple powers of 2 in the numerator, which
+       will test that the shift in mpz_si_kronecker() uses unsigned not
+       signed.  */
+    { "-2",  "1",  1 },
+    { "-2",  "3",  1 },
+    { "-2",  "5", -1 },
+    { "-2",  "7", -1 },
+    { "-2",  "9",  1 },
+    { "-2", "11",  1 },
+    { "-2", "13", -1 },
+    { "-2", "15", -1 },
+    { "-2", "17",  1 },
+
+    /* (a/2)=(2/a).
+       try_2den() will exercise multiple powers of 2 in the denominator. */
+    {  "3",  "2", -1 },
+    {  "5",  "2", -1 },
+    {  "7",  "2",  1 },
+    {  "9",  "2",  1 },
+    {  "11", "2", -1 },
+
+    /* Harriet Griffin, "Elementary Theory of Numbers", page 155, various
+       examples.  */
+    {   "2", "135",  1 },
+    { "135",  "19", -1 },
+    {   "2",  "19", -1 },
+    {  "19", "135",  1 },
+    { "173", "135",  1 },
+    {  "38", "135",  1 },
+    { "135", "173",  1 },
+    { "173",   "5", -1 },
+    {   "3",   "5", -1 },
+    {   "5", "173", -1 },
+    { "173",   "3", -1 },
+    {   "2",   "3", -1 },
+    {   "3", "173", -1 },
+    { "253",  "21",  1 },
+    {   "1",  "21",  1 },
+    {  "21", "253",  1 },
+    {  "21",  "11", -1 },
+    {  "-1",  "11", -1 },
+
+    /* Griffin page 147 */
+    {  "-1",  "17",  1 },
+    {   "2",  "17",  1 },
+    {  "-2",  "17",  1 },
+    {  "-1",  "89",  1 },
+    {   "2",  "89",  1 },
+
+    /* Griffin page 148 */
+    {  "89",  "11",  1 },
+    {   "1",  "11",  1 },
+    {  "89",   "3", -1 },
+    {   "2",   "3", -1 },
+    {   "3",  "89", -1 },
+    {  "11",  "89",  1 },
+    {  "33",  "89", -1 },
+
+    /* H. Davenport, "The Higher Arithmetic", page 65, the quadratic
+       residues and non-residues mod 19.  */
+    {  "1", "19",  1 },
+    {  "4", "19",  1 },
+    {  "5", "19",  1 },
+    {  "6", "19",  1 },
+    {  "7", "19",  1 },
+    {  "9", "19",  1 },
+    { "11", "19",  1 },
+    { "16", "19",  1 },
+    { "17", "19",  1 },
+    {  "2", "19", -1 },
+    {  "3", "19", -1 },
+    {  "8", "19", -1 },
+    { "10", "19", -1 },
+    { "12", "19", -1 },
+    { "13", "19", -1 },
+    { "14", "19", -1 },
+    { "15", "19", -1 },
+    { "18", "19", -1 },
+
+    /* Residues and non-residues mod 13 */
+    {  "0",  "13",  0 },
+    {  "1",  "13",  1 },
+    {  "2",  "13", -1 },
+    {  "3",  "13",  1 },
+    {  "4",  "13",  1 },
+    {  "5",  "13", -1 },
+    {  "6",  "13", -1 },
+    {  "7",  "13", -1 },
+    {  "8",  "13", -1 },
+    {  "9",  "13",  1 },
+    { "10",  "13",  1 },
+    { "11",  "13", -1 },
+    { "12",  "13",  1 },
+
+    /* various */
+    {  "5",   "7", -1 },
+    { "15",  "17",  1 },
+    { "67",  "89",  1 },
+
+    /* special values inducing a==b==1 at the end of jac_or_kron() */
+    { "0x10000000000000000000000000000000000000000000000001",
+      "0x10000000000000000000000000000000000000000000000003", 1 },
+
+    /* Test for previous bugs in jacobi_2. */
+    { "0x43900000000", "0x42400000439", -1 }, /* 32-bit limbs */
+    { "0x4390000000000000000", "0x4240000000000000439", -1 }, /* 64-bit limbs */
+
+    { "198158408161039063", "198158360916398807", -1 },
+
+    /* Some tests involving large quotients in the continued fraction
+       expansion. */
+    { "37200210845139167613356125645445281805",
+      "451716845976689892447895811408978421929", -1 },
+    { "67674091930576781943923596701346271058970643542491743605048620644676477275152701774960868941561652032482173612421015",
+      "4902678867794567120224500687210807069172039735", 0 },
+    { "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683283672592", "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683290481773", 1 },
+
+    /* Exercises the case asize == 1, btwos > 0 in mpz_jacobi. */
+    { "804609", "421248363205206617296534688032638102314410556521742428832362659824", 1 } ,
+    { "4190209", "2239744742177804210557442048984321017460028974602978995388383905961079286530650825925074203175536427000", 1 },
+
+    /* Exercises the case asize == 1, btwos = 63 in mpz_jacobi
+       (relevant when GMP_LIMB_BITS == 64). */
+    { "17311973299000934401", "1675975991242824637446753124775689449936871337036614677577044717424700351103148799107651171694863695242089956242888229458836426332300124417011114380886016", 1 },
+    { "3220569220116583677", "41859917623035396746", -1 },
+
+    /* Other test cases that triggered bugs during development. */
+    { "37200210845139167613356125645445281805", "340116213441272389607827434472642576514", -1 },
+    { "74400421690278335226712251290890563610", "451716845976689892447895811408978421929", -1 },
+  };
+
+  int    i;
+  mpz_t  a, b;
+
+  mpz_init (a);
+  mpz_init (b);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (b, data[i].b, 0);
+      try_all (a, b, data[i].answer);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
+
+
+/* (a^2/b)=1 if gcd(a,b)=1, or (a^2/b)=0 if gcd(a,b)!=1.
+   This includes when a=0 or b=0. */
+void
+check_squares_zi (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t  a, b, g;
+  int    i, answer;
+  mp_size_t size_range, an, bn;
+  mpz_t bs;
+
+  mpz_init (bs);
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (g);
+
+  for (i = 0; i < 50; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + i/8 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      an = mpz_get_ui (bs);
+      mpz_rrandomb (a, rands, an);
+
+      mpz_urandomb (bs, rands, size_range);
+      bn = mpz_get_ui (bs);
+      mpz_rrandomb (b, rands, bn);
+
+      mpz_gcd (g, a, b);
+      if (mpz_cmp_ui (g, 1L) == 0)
+	answer = 1;
+      else
+	answer = 0;
+
+      mpz_mul (a, a, a);
+
+      try_all (a, b, answer);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (g);
+}
+
+
+/* Check the handling of asize==0, make sure it isn't affected by the low
+   limb. */
+void
+check_a_zero (void)
+{
+  mpz_t  a, b;
+
+  mpz_init_set_ui (a, 0);
+  mpz_init (b);
+
+  mpz_set_ui (b, 1L);
+  PTR(a)[0] = 0;
+  try_all (a, b, 1);   /* (0/1)=1 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 1);   /* (0/1)=1 */
+
+  mpz_set_si (b, -1L);
+  PTR(a)[0] = 0;
+  try_all (a, b, 1);   /* (0/-1)=1 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 1);   /* (0/-1)=1 */
+
+  mpz_set_ui (b, 0);
+  PTR(a)[0] = 0;
+  try_all (a, b, 0);   /* (0/0)=0 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 0);   /* (0/0)=0 */
+
+  mpz_set_ui (b, 2);
+  PTR(a)[0] = 0;
+  try_all (a, b, 0);   /* (0/2)=0 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 0);   /* (0/2)=0 */
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
+
+
+/* Assumes that b = prod p_k^e_k */
+int
+ref_jacobi (mpz_srcptr a, mpz_srcptr b, unsigned nprime,
+	    mpz_t prime[], unsigned *exp)
+{
+  unsigned i;
+  int res;
+
+  for (i = 0, res = 1; i < nprime; i++)
+    if (exp[i])
+      {
+	int legendre = refmpz_legendre (a, prime[i]);
+	if (!legendre)
+	  return 0;
+	if (exp[i] & 1)
+	  res *= legendre;
+      }
+  return res;
+}
+
+void
+check_jacobi_factored (void)
+{
+#define PRIME_N 10
+#define PRIME_MAX_SIZE 50
+#define PRIME_MAX_EXP 4
+#define PRIME_A_COUNT 10
+#define PRIME_B_COUNT 5
+#define PRIME_MAX_B_SIZE 2000
+
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t prime[PRIME_N];
+  unsigned exp[PRIME_N];
+  mpz_t a, b, t, bs;
+  unsigned i;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (t);
+  mpz_init (bs);
+
+  /* Generate primes */
+  for (i = 0; i < PRIME_N; i++)
+    {
+      mp_size_t size;
+      mpz_init (prime[i]);
+      mpz_urandomb (bs, rands, 32);
+      size = mpz_get_ui (bs) % PRIME_MAX_SIZE + 2;
+      mpz_rrandomb (prime[i], rands, size);
+      if (mpz_cmp_ui (prime[i], 3) <= 0)
+	mpz_set_ui (prime[i], 3);
+      else
+	mpz_nextprime (prime[i], prime[i]);
+    }
+
+  for (i = 0; i < PRIME_B_COUNT; i++)
+    {
+      unsigned j, k;
+      mp_bitcnt_t bsize;
+
+      mpz_set_ui (b, 1);
+      bsize = 1;
+
+      for (j = 0; j < PRIME_N && bsize < PRIME_MAX_B_SIZE; j++)
+	{
+	  mpz_urandomb (bs, rands, 32);
+	  exp[j] = mpz_get_ui (bs) % PRIME_MAX_EXP;
+	  mpz_pow_ui (t, prime[j], exp[j]);
+	  mpz_mul (b, b, t);
+	  bsize = mpz_sizeinbase (b, 2);
+	}
+      for (k = 0; k < PRIME_A_COUNT; k++)
+	{
+	  int answer;
+	  mpz_rrandomb (a, rands, bsize + 2);
+	  answer = ref_jacobi (a, b, j, prime, exp);
+	  try_all (a, b, answer);
+	}
+    }
+  for (i = 0; i < PRIME_N; i++)
+    mpz_clear (prime[i]);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (t);
+  mpz_clear (bs);
+
+#undef PRIME_N
+#undef PRIME_MAX_SIZE
+#undef PRIME_MAX_EXP
+#undef PRIME_A_COUNT
+#undef PRIME_B_COUNT
+#undef PRIME_MAX_B_SIZE
+}
+
+/* These tests compute (a|n), where the quotient sequence includes
+   large quotients, and n has a known factorization. Such inputs are
+   generated as follows. First, construct a large n, as a power of a
+   prime p of moderate size.
+
+   Next, compute a matrix from factors (q,1;1,0), with q chosen with
+   uniformly distributed size. We must stop with matrix elements of
+   roughly half the size of n. Denote elements of M as M = (m00, m01;
+   m10, m11).
+
+   We now look for solutions to
+
+     n = m00 x + m01 y
+     a = m10 x + m11 y
+
+   with x,y > 0. Since n >= m00 * m01, there exists a positive
+   solution to the first equation. Find those x, y, and substitute in
+   the second equation to get a. Then the quotient sequence for (a|n)
+   is precisely the quotients used when constructing M, followed by
+   the quotient sequence for (x|y).
+
+   Numbers should also be large enough that we exercise hgcd_jacobi,
+   which means that they should be larger than
+
+     max (GCD_DC_THRESHOLD, 3 * HGCD_THRESHOLD)
+
+   With an n of roughly 40000 bits, this should hold on most machines.
+*/
+
+void
+check_large_quotients (void)
+{
+#define COUNT 50
+#define PBITS 200
+#define PPOWER 201
+#define MAX_QBITS 500
+
+  gmp_randstate_ptr rands = RANDS;
+
+  mpz_t p, n, q, g, s, t, x, y, bs;
+  mpz_t M[2][2];
+  mp_bitcnt_t nsize;
+  unsigned i;
+
+  mpz_init (p);
+  mpz_init (n);
+  mpz_init (q);
+  mpz_init (g);
+  mpz_init (s);
+  mpz_init (t);
+  mpz_init (x);
+  mpz_init (y);
+  mpz_init (bs);
+  mpz_init (M[0][0]);
+  mpz_init (M[0][1]);
+  mpz_init (M[1][0]);
+  mpz_init (M[1][1]);
+
+  /* First generate a number with known factorization, as a random
+     smallish prime raised to an odd power. Then (a|n) = (a|p). */
+  mpz_rrandomb (p, rands, PBITS);
+  mpz_nextprime (p, p);
+  mpz_pow_ui (n, p, PPOWER);
+
+  nsize = mpz_sizeinbase (n, 2);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      int answer;
+      mp_bitcnt_t msize;
+
+      mpz_set_ui (M[0][0], 1);
+      mpz_set_ui (M[0][1], 0);
+      mpz_set_ui (M[1][0], 0);
+      mpz_set_ui (M[1][1], 1);
+
+      for (msize = 1; 2*(msize + MAX_QBITS) + 1 < nsize ;)
+	{
+	  unsigned i;
+	  mpz_rrandomb (bs, rands, 32);
+	  mpz_rrandomb (q, rands, 1 + mpz_get_ui (bs) % MAX_QBITS);
+
+	  /* Multiply by (q, 1; 1,0) from the right */
+	  for (i = 0; i < 2; i++)
+	    {
+	      mp_bitcnt_t size;
+	      mpz_swap (M[i][0], M[i][1]);
+	      mpz_addmul (M[i][0], M[i][1], q);
+	      size = mpz_sizeinbase (M[i][0], 2);
+	      if (size > msize)
+		msize = size;
+	    }
+	}
+      mpz_gcdext (g, s, t, M[0][0], M[0][1]);
+      ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);
+
+      /* Solve n = M[0][0] * x + M[0][1] * y */
+      if (mpz_sgn (s) > 0)
+	{
+	  mpz_mul (x, n, s);
+	  mpz_fdiv_qr (q, x, x, M[0][1]);
+	  mpz_mul (y, q, M[0][0]);
+	  mpz_addmul (y, t, n);
+	  ASSERT_ALWAYS (mpz_sgn (y) > 0);
+	}
+      else
+	{
+	  mpz_mul (y, n, t);
+	  mpz_fdiv_qr (q, y, y, M[0][0]);
+	  mpz_mul (x, q, M[0][1]);
+	  mpz_addmul (x, s, n);
+	  ASSERT_ALWAYS (mpz_sgn (x) > 0);
+	}
+      mpz_mul (x, x, M[1][0]);
+      mpz_addmul (x, y, M[1][1]);
+
+      /* Now (x|n) has the selected large quotients */
+      answer = refmpz_legendre (x, p);
+      try_zi_zi (x, n, answer);
+    }
+  mpz_clear (p);
+  mpz_clear (n);
+  mpz_clear (q);
+  mpz_clear (g);
+  mpz_clear (s);
+  mpz_clear (t);
+  mpz_clear (x);
+  mpz_clear (y);
+  mpz_clear (bs);
+  mpz_clear (M[0][0]);
+  mpz_clear (M[0][1]);
+  mpz_clear (M[1][0]);
+  mpz_clear (M[1][1]);
+#undef COUNT
+#undef PBITS
+#undef PPOWER
+#undef MAX_QBITS
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  if (argc >= 2 && strcmp (argv[1], "-p") == 0)
+    {
+      option_pari = 1;
+
+      printf ("\
+try(a,b,answer) =\n\
+{\n\
+  if (kronecker(a,b) != answer,\n\
+    print(\"wrong at \", a, \",\", b,\n\
+      \" expected \", answer,\n\
+      \" pari says \", kronecker(a,b)))\n\
+}\n");
+    }
+
+  check_data ();
+  check_squares_zi ();
+  check_a_zero ();
+  check_jacobi_factored ();
+  check_large_quotients ();
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-lcm.c b/tests/mpz/t-lcm.c
new file mode 100644
index 0000000..1b45212
--- /dev/null
+++ b/tests/mpz/t-lcm.c

@@ -0,0 +1,184 @@
+/* Test mpz_lcm and mpz_lcm_ui.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_all (mpz_ptr want, mpz_srcptr x_orig, mpz_srcptr y_orig)
+{
+  mpz_t  got, x, y;
+  int    negx, negy, swap, inplace;
+
+  mpz_init (got);
+  mpz_init_set (x, x_orig);
+  mpz_init_set (y, y_orig);
+
+  for (swap = 0; swap < 2; swap++)
+    {
+      mpz_swap (x, y);
+
+      for (negx = 0; negx < 2; negx++)
+	{
+	  mpz_neg (x, x);
+
+	  for (negy = 0; negy < 2; negy++)
+	    {
+	      mpz_neg (y, y);
+
+	      for (inplace = 0; inplace <= 1; inplace++)
+		{
+		  if (inplace)
+		    { mpz_set (got, x); mpz_lcm (got, got, y); }
+		  else
+		    mpz_lcm (got, x, y);
+		  MPZ_CHECK_FORMAT (got);
+
+		  if (mpz_cmp (got, want) != 0)
+		    {
+		      printf ("mpz_lcm wrong, inplace=%d\n", inplace);
+		    fail:
+		      mpz_trace ("x", x);
+		      mpz_trace ("y", y);
+		      mpz_trace ("got", got);
+		      mpz_trace ("want", want);
+		      abort ();
+		    }
+
+		  if (mpz_fits_ulong_p (y))
+		    {
+		      unsigned long  yu = mpz_get_ui (y);
+		      if (inplace)
+			{ mpz_set (got, x); mpz_lcm_ui (got, got, yu); }
+		      else
+			mpz_lcm_ui (got, x, yu);
+
+		      if (mpz_cmp (got, want) != 0)
+			{
+			  printf ("mpz_lcm_ui wrong, inplace=%d\n", inplace);
+			  printf    ("yu=%lu\n", yu);
+			  goto fail;
+			}
+		    }
+		}
+	    }
+	}
+    }
+
+  mpz_clear (got);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_primes (void)
+{
+  static unsigned long  prime[] = {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+    101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,
+    191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,
+    281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,
+    389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,
+  };
+  mpz_t  want, x, y;
+  int    i;
+
+  mpz_init (want);
+  mpz_init (x);
+  mpz_init (y);
+
+  /* Check zeros. */
+  mpz_set_ui (want, 0);
+  mpz_set_ui (x, 1);
+  check_all (want, want, want);
+  check_all (want, want, x);
+  check_all (want, x, want);
+
+  /* New prime each time. */
+  mpz_set_ui (want, 1L);
+  for (i = 0; i < numberof (prime); i++)
+    {
+      mpz_set (x, want);
+      mpz_set_ui (y, prime[i]);
+      mpz_mul_ui (want, want, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* Old prime each time. */
+  mpz_set (x, want);
+  for (i = 0; i < numberof (prime); i++)
+    {
+      mpz_set_ui (y, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* One old, one new each time. */
+  mpz_set_ui (want, prime[0]);
+  for (i = 1; i < numberof (prime); i++)
+    {
+      mpz_set (x, want);
+      mpz_set_ui (y, prime[i] * prime[i-1]);
+      mpz_mul_ui (want, want, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* Triplets with A,B in x and B,C in y. */
+  mpz_set_ui (want, 1L);
+  mpz_set_ui (x, 1L);
+  mpz_set_ui (y, 1L);
+  for (i = 0; i+2 < numberof (prime); i += 3)
+    {
+      mpz_mul_ui (want, want, prime[i]);
+      mpz_mul_ui (want, want, prime[i+1]);
+      mpz_mul_ui (want, want, prime[i+2]);
+
+      mpz_mul_ui (x, x, prime[i]);
+      mpz_mul_ui (x, x, prime[i+1]);
+
+      mpz_mul_ui (y, y, prime[i+1]);
+      mpz_mul_ui (y, y, prime[i+2]);
+
+      check_all (want, x, y);
+    }
+
+
+  mpz_clear (want);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_primes ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-limbs.c b/tests/mpz/t-limbs.c
new file mode 100644
index 0000000..6526e92
--- /dev/null
+++ b/tests/mpz/t-limbs.c

@@ -0,0 +1,232 @@
+/* Test mpz_limbs_* functions
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define COUNT 100
+#define BITSIZE 500
+
+/* Like mpz_add. For simplicity, support positive inputs only. */
+static void
+alt_add (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)
+{
+  mp_size_t an = mpz_size (a);
+  mp_size_t bn = mpz_size (b);
+  mp_ptr rp;
+
+  ASSERT (an > 0);
+  ASSERT (bn > 0);
+  if (an < bn)
+    {
+      MP_SIZE_T_SWAP (an, bn);
+      MPZ_SRCPTR_SWAP (a, b);
+    }
+  rp = mpz_limbs_modify (r, an + 1);
+  rp[an] = mpn_add (rp, mpz_limbs_read (a), an, mpz_limbs_read (b), bn);
+  mpz_limbs_finish (r, an + 1);
+}
+
+static void
+check_funcs (const char *name,
+	     void (*f)(mpz_ptr, mpz_srcptr, mpz_srcptr),
+	     void (*ref_f)(mpz_ptr, mpz_srcptr, mpz_srcptr),
+	     mpz_srcptr a, mpz_srcptr b)
+{
+  mpz_t r, ref;
+  mpz_inits (r, ref, NULL);
+
+  ref_f (ref, a, b);
+  MPZ_CHECK_FORMAT (ref);
+  f (r, a, b);
+  MPZ_CHECK_FORMAT (r);
+
+  if (mpz_cmp (r, ref) != 0)
+    {
+      printf ("%s failed, abits %u, bbits %u\n",
+	      name,
+	      (unsigned) mpz_sizeinbase (a, 2),
+	      (unsigned) mpz_sizeinbase (b, 2));
+      gmp_printf ("a = %Zx\n", a);
+      gmp_printf ("b = %Zx\n", b);
+      gmp_printf ("r = %Zx (bad)\n", r);
+      gmp_printf ("ref = %Zx\n", ref);
+      abort ();
+    }
+  mpz_clears (r, ref, NULL);
+}
+
+static void
+check_add (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t bs, a, b;
+  unsigned i;
+  mpz_inits (bs, a, b, NULL);
+  for (i = 0; i < COUNT; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+
+      check_funcs ("add", alt_add, mpz_add, a, b);
+    }
+  mpz_clears (bs, a, b, NULL);
+}
+
+static void
+alt_mul (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)
+{
+  mp_size_t an = mpz_size (a);
+  mp_size_t bn = mpz_size (b);
+  mp_srcptr ap, bp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (an > 0);
+  ASSERT (bn > 0);
+  if (an < bn)
+    {
+      MP_SIZE_T_SWAP (an, bn);
+      MPZ_SRCPTR_SWAP (a, b);
+    }
+  /* NOTE: This copying seems unnecessary; better to allocate new
+     result area, and free the old area when done. */
+  if (r == a)
+    {
+      mp_ptr tp =  TMP_ALLOC_LIMBS (an);
+      MPN_COPY (tp, mpz_limbs_read (a), an);
+      ap = tp;
+      bp = (a == b) ? ap : mpz_limbs_read (b);
+    }
+  else if (r == b)
+    {
+      mp_ptr tp = TMP_ALLOC_LIMBS (bn);
+      MPN_COPY (tp, mpz_limbs_read (b), bn);
+      bp = tp;
+      ap = mpz_limbs_read (a);
+    }
+  else
+    {
+      ap = mpz_limbs_read (a);
+      bp = mpz_limbs_read (b);
+    }
+  mpn_mul (mpz_limbs_write (r, an + bn),
+	   ap, an, bp, bn);
+
+  mpz_limbs_finish (r, an + bn);
+}
+
+void
+check_mul (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t bs, a, b;
+  unsigned i;
+  mpz_inits (bs, a, b, NULL);
+  for (i = 0; i < COUNT; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+
+      check_funcs ("mul", alt_mul, mpz_mul, a, b);
+    }
+  mpz_clears (bs, a, b, NULL);
+}
+
+#define MAX_SIZE 100
+
+static void
+check_roinit (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t bs, a, b, r, ref;
+  unsigned i;
+
+  mpz_inits (bs, a, b, r, ref, NULL);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mp_srcptr ap, bp;
+      mp_size_t an, bn;
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (a, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+      mpz_urandomb (bs, rands, 32);
+      mpz_rrandomb (b, rands, 1 + mpz_get_ui (bs) % BITSIZE);
+
+      an = mpz_size (a);
+      ap = mpz_limbs_read (a);
+      bn = mpz_size (b);
+      bp = mpz_limbs_read (b);
+
+      mpz_add (ref, a, b);
+      {
+	mpz_t a1, b1;
+#if __STDC_VERSION__ >= 199901
+	const mpz_t a2 = MPZ_ROINIT_N ( (mp_ptr) ap, an);
+	const mpz_t b2 = MPZ_ROINIT_N ( (mp_ptr) bp, bn);
+
+	mpz_set_ui (r, 0);
+	mpz_add (r, a2, b2);
+	if (mpz_cmp (r, ref) != 0)
+	  {
+	    printf ("MPZ_ROINIT_N failed\n");
+	    gmp_printf ("a = %Zx\n", a);
+	    gmp_printf ("b = %Zx\n", b);
+	    gmp_printf ("r = %Zx (bad)\n", r);
+	    gmp_printf ("ref = %Zx\n", ref);
+	    abort ();
+	  }
+#endif
+	mpz_set_ui (r, 0);
+	mpz_add (r, mpz_roinit_n (a1, ap, an), mpz_roinit_n (b1, bp, bn));
+	if (mpz_cmp (r, ref) != 0)
+	  {
+	    printf ("mpz_roinit_n failed\n");
+	    gmp_printf ("a = %Zx\n", a);
+	    gmp_printf ("b = %Zx\n", b);
+	    gmp_printf ("r = %Zx (bad)\n", r);
+	    gmp_printf ("ref = %Zx\n", ref);
+	    abort ();
+	  }
+      }
+    }
+  mpz_clears (bs, a, b, r, ref, NULL);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  tests_end ();
+
+  check_add ();
+  check_mul ();
+  check_roinit ();
+
+  return 0;
+
+}

diff --git a/tests/mpz/t-lucm.c b/tests/mpz/t-lucm.c
new file mode 100644
index 0000000..24be85b
--- /dev/null
+++ b/tests/mpz/t-lucm.c

@@ -0,0 +1,144 @@
+/* Test mpz_powm, mpz_lucas_mod.
+
+Copyright 1991, 1993, 1994, 1996, 1999-2001, 2009, 2012, 2018 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t, int);
+
+#define SIZEM 8
+
+/* FIXME: Should we implement another sequence to test lucas mod?	*/
+/* Eg: a generalisation of what we use for Fibonacci:	*/
+/* U_{2n-1} = U_n^2 - Q*U_{n-1}^2	*/
+/* U_{2n+1} = D*U_n^2  + Q*U_{2n-1} + 2*Q^n ; whith D = (P^2-4*Q)	*/
+/* P*U_{2n} = U_{2n+1} + Q*U_{2n-1}	*/
+
+int
+main (int argc, char **argv)
+{
+  mpz_t base, exp, mod;
+  mpz_t r1, r2, t1, t2;
+  mp_size_t exp_size, mod_size;
+  int i, res;
+  int reps = 1000;
+  long Q;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (base);
+  mpz_init (exp);
+  mpz_init (mod);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (t1);
+  mpz_init (t2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % SIZEM + 1;
+
+      do  /* Loop until base >= 2 and fits in a long.  */
+	{
+	  mpz_urandomb (base, rands, BITS_PER_ULONG - 2);
+	}
+      while (mpz_cmp_ui (base, 2) < 0 || mpz_fits_slong_p (base) == 0);
+
+      Q = mpz_get_ui (base);
+
+      do
+        {
+	  ++size_range;
+	  size_range = MIN (size_range, SIZEM);
+	  mpz_urandomb (bs, rands, size_range);
+	  mod_size = mpz_get_ui (bs);
+	  mpz_rrandomb (mod, rands, mod_size);
+	  mpz_add_ui (mod, mod, 16);
+	}
+      while (mpz_gcd_ui (NULL, mod, Q) != 1);
+
+      mod_size = mpz_sizeinbase (mod, 2) - 3;
+      mpz_urandomb (bs, rands, 32);
+      exp_size = mpz_get_ui (bs) % mod_size + 2;
+
+      mpz_tdiv_q_2exp (exp, mod, exp_size);
+      mpz_add_ui (exp, exp, 1);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	{
+	  mpz_neg (base, base);
+	  Q = -Q;
+	}
+
+      res = mpz_lucas_mod (t1, r2, Q, exp_size, mod, t2, r1);
+      if (res && ++reps)
+	continue;
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp_ui (r2, 0) < 0)
+	mpz_add (r2, r2, mod);
+      mpz_powm (r1, base, exp, mod);
+
+      if (mpz_cmp (r1, r2) != 0)
+	{
+	  fprintf (stderr, "\nIncorrect results in test %d for operands:\n", i);
+	  debug_mp (base, -16);
+	  debug_mp (exp, -16);
+	  debug_mp (mod, -16);
+	  fprintf (stderr, "mpz_powm result:\n");
+	  debug_mp (r1, -16);
+	  fprintf (stderr, "mpz_lucas_mod result (%d) Q=%ld:\n", res, Q);
+	  debug_mp (r2, -16);
+	  abort ();
+	}
+    }
+
+  mpz_clear (bs);
+  mpz_clear (base);
+  mpz_clear (exp);
+  mpz_clear (mod);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (t1);
+  mpz_clear (t2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-lucnum_ui.c b/tests/mpz/t-lucnum_ui.c
new file mode 100644
index 0000000..34c4315
--- /dev/null
+++ b/tests/mpz/t-lucnum_ui.c

@@ -0,0 +1,96 @@
+/* Test mpz_lucnum_ui and mpz_lucnum2_ui.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-lucnum_ui [n]
+
+   Test up to L[n], or if n is omitted then the default limit below.  A
+   literal "x" for the limit means continue forever, this being meant only
+   for development.  */
+
+
+void
+check_sequence (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 100 * GMP_LIMB_BITS;
+  mpz_t          want_ln, want_ln1, got_ln, got_ln1;
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else
+    TESTS_REPS (limit, argv, argc);
+
+  /* start at n==0 */
+  mpz_init_set_si (want_ln1, -1); /* L[-1] */
+  mpz_init_set_ui (want_ln,  2);  /* L[0]   */
+  mpz_init (got_ln);
+  mpz_init (got_ln1);
+
+  for (n = 0; n < limit; n++)
+    {
+      mpz_lucnum2_ui (got_ln, got_ln1, n);
+      MPZ_CHECK_FORMAT (got_ln);
+      MPZ_CHECK_FORMAT (got_ln1);
+      if (mpz_cmp (got_ln, want_ln) != 0 || mpz_cmp (got_ln1, want_ln1) != 0)
+        {
+          printf ("mpz_lucnum2_ui(%lu) wrong\n", n);
+          mpz_trace ("want ln ", want_ln);
+          mpz_trace ("got  ln ",  got_ln);
+          mpz_trace ("want ln1", want_ln1);
+          mpz_trace ("got  ln1",  got_ln1);
+          abort ();
+        }
+
+      mpz_lucnum_ui (got_ln, n);
+      MPZ_CHECK_FORMAT (got_ln);
+      if (mpz_cmp (got_ln, want_ln) != 0)
+        {
+          printf ("mpz_lucnum_ui(%lu) wrong\n", n);
+          mpz_trace ("want ln", want_ln);
+          mpz_trace ("got  ln", got_ln);
+          abort ();
+        }
+
+      mpz_add (want_ln1, want_ln1, want_ln);  /* L[n+1] = L[n] + L[n-1] */
+      mpz_swap (want_ln1, want_ln);
+    }
+
+  mpz_clear (want_ln);
+  mpz_clear (want_ln1);
+  mpz_clear (got_ln);
+  mpz_clear (got_ln1);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_sequence (argc, argv);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-mfac_uiui.c b/tests/mpz/t-mfac_uiui.c
new file mode 100644
index 0000000..8bca2d7
--- /dev/null
+++ b/tests/mpz/t-mfac_uiui.c

@@ -0,0 +1,135 @@
+/* Exercise mpz_mfac_uiui.
+
+Copyright 2000-2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-mfac_uiui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+#define MULTIFAC_WHEEL (2*3*11)
+#define MULTIFAC_WHEEL2 (5*13)
+
+int
+main (int argc, char *argv[])
+{
+  mpz_t ref[MULTIFAC_WHEEL], ref2[MULTIFAC_WHEEL2], res;
+  unsigned long n, j, m, m2;
+  unsigned long limit = 2222, step = 1;
+
+  tests_start ();
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else
+    TESTS_REPS (limit, argv, argc);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  for (m = 0; m < MULTIFAC_WHEEL; m++)
+    mpz_init_set_ui(ref [m],1);
+  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+    mpz_init_set_ui(ref2 [m2],1);
+
+  mpz_init (res);
+
+  m = 0;
+  m2 = 0;
+  for (n = 0; n <= limit;)
+    {
+      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL);
+      MPZ_CHECK_FORMAT (res);
+      if (mpz_cmp (ref[m], res) != 0)
+        {
+          printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL);
+          printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, ref[m]); printf("\n");
+          abort ();
+        }
+      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL2);
+      MPZ_CHECK_FORMAT (res);
+      if (mpz_cmp (ref2[m2], res) != 0)
+        {
+          printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL2);
+          printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, ref2[m2]); printf("\n");
+          abort ();
+        }
+      if (n + step <= limit)
+	for (j = 0; j < step; j++) {
+	  n++; m++; m2++;
+	  if (m >= MULTIFAC_WHEEL) m -= MULTIFAC_WHEEL;
+	  if (m2 >= MULTIFAC_WHEEL2) m2 -= MULTIFAC_WHEEL2;
+	  mpz_mul_ui (ref[m], ref[m], n); /* Compute a reference, with current library */
+	  mpz_mul_ui (ref2[m2], ref2[m2], n); /* Compute a reference, with current library */
+	}
+      else n += step;
+    }
+  mpz_fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 1);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  mpz_2fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 2);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  n++;
+  mpz_2fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 2);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,2) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  for (m = 0; m < MULTIFAC_WHEEL; m++)
+    mpz_clear (ref[m]);
+  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+    mpz_clear (ref2[m2]);
+  mpz_clear (res);
+
+  tests_end ();
+
+  exit (0);
+}

diff --git a/tests/mpz/t-mul.c b/tests/mpz/t-mul.c
new file mode 100644
index 0000000..0dc22e9
--- /dev/null
+++ b/tests/mpz/t-mul.c

@@ -0,0 +1,221 @@
+/* Test mpz_cmp, mpz_mul.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void debug_mp (mpz_t);
+static void refmpz_mul (mpz_t, const mpz_t, const mpz_t);
+void dump_abort (int, const char *, mpz_t, mpz_t, mpz_t, mpz_t);
+
+#define FFT_MIN_BITSIZE 100000
+
+char *extra_fft;
+
+void
+one (int i, mpz_t multiplicand, mpz_t multiplier)
+{
+  mpz_t product, ref_product;
+
+  mpz_init (product);
+  mpz_init (ref_product);
+
+  /* Test plain multiplication comparing results against reference code.  */
+  mpz_mul (product, multiplier, multiplicand);
+  refmpz_mul (ref_product, multiplier, multiplicand);
+  if (mpz_cmp (product, ref_product))
+    dump_abort (i, "incorrect plain product",
+		multiplier, multiplicand, product, ref_product);
+
+  /* Test squaring, comparing results against plain multiplication  */
+  mpz_mul (product, multiplier, multiplier);
+  mpz_set (multiplicand, multiplier);
+  mpz_mul (ref_product, multiplier, multiplicand);
+  if (mpz_cmp (product, ref_product))
+    dump_abort (i, "incorrect square product",
+		multiplier, multiplier, product, ref_product);
+
+  mpz_clear (product);
+  mpz_clear (ref_product);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  int i;
+  int fft_max_2exp;
+
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range, fsize_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  extra_fft = getenv ("GMP_CHECK_FFT");
+  fft_max_2exp = 0;
+  if (extra_fft != NULL)
+    {
+      fft_max_2exp = atoi (extra_fft);
+      printf ("GMP_CHECK_FFT=%d (include this in bug reports)\n", fft_max_2exp);
+    }
+
+  if (fft_max_2exp <= 1)	/* compat with old use of GMP_CHECK_FFT */
+    fft_max_2exp = 22;		/* default limit, good for any machine */
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+
+  fsize_range = 4 << 8;		/* a fraction 1/256 of size_range */
+  for (i = 0;; i++)
+    {
+      size_range = fsize_range >> 8;
+      fsize_range = fsize_range * 33 / 32;
+
+      if (size_range > fft_max_2exp)
+	break;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs));
+      if (i & 1)
+	mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+
+      mpz_urandomb (bs, rands, 4);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 0x3) == 0)
+	mpz_neg (op1, op1);
+      if ((bsi & 0xC) == 0)
+	mpz_neg (op2, op2);
+
+      /* printf ("%d %d\n", SIZ (op1), SIZ (op2)); */
+      one (i, op2, op1);
+    }
+
+  for (i = -50; i < 0; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % fft_max_2exp;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);
+
+      /* printf ("%d: %d %d\n", i, SIZ (op1), SIZ (op2)); */
+      fflush (stdout);
+      one (-1, op2, op1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
+{
+  mp_size_t usize = u->_mp_size;
+  mp_size_t vsize = v->_mp_size;
+  mp_size_t wsize;
+  mp_size_t sign_product;
+  mp_ptr up, vp;
+  mp_ptr wp;
+  mp_size_t talloc;
+
+  sign_product = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (usize == 0 || vsize == 0)
+    {
+      SIZ (w) = 0;
+      return;
+    }
+
+  talloc = usize + vsize;
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);
+
+  if (usize > vsize)
+    refmpn_mul (wp, up, usize, vp, vsize);
+  else
+    refmpn_mul (wp, vp, vsize, up, usize);
+  wsize = usize + vsize;
+  wsize -= wp[wsize - 1] == 0;
+  MPZ_REALLOC (w, wsize);
+  MPN_COPY (PTR(w), wp, wsize);
+
+  SIZ(w) = sign_product < 0 ? -wsize : wsize;
+  __GMP_FREE_FUNC_LIMBS (wp, talloc);
+}
+
+void
+dump_abort (int i, const char *s,
+            mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)
+{
+  mp_size_t b, e;
+  fprintf (stderr, "ERROR: %s in test %d\n", s, i);
+  fprintf (stderr, "op1          = "); debug_mp (op1);
+  fprintf (stderr, "op2          = "); debug_mp (op2);
+  fprintf (stderr, "    product  = "); debug_mp (product);
+  fprintf (stderr, "ref_product  = "); debug_mp (ref_product);
+  for (b = 0; b < ABSIZ(ref_product); b++)
+    if (PTR(ref_product)[b] != PTR(product)[b])
+      break;
+  for (e = ABSIZ(ref_product) - 1; e >= 0; e--)
+    if (PTR(ref_product)[e] != PTR(product)[e])
+      break;
+  printf ("ERRORS in %ld--%ld\n", b, e);
+  abort();
+}
+
+void
+debug_mp (mpz_t x)
+{
+  size_t siz = mpz_sizeinbase (x, 16);
+
+  if (siz > 65)
+    {
+      mpz_t q;
+      mpz_init (q);
+      mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));
+      gmp_fprintf (stderr, "%ZX...", q);
+      mpz_tdiv_r_2exp (q, x, 4 * 25);
+      gmp_fprintf (stderr, "%025ZX [%d]\n", q, (int) siz);
+      mpz_clear (q);
+    }
+  else
+    {
+      gmp_fprintf (stderr, "%ZX\n", x);
+    }
+}

diff --git a/tests/mpz/t-mul_i.c b/tests/mpz/t-mul_i.c
new file mode 100644
index 0000000..5f2dae2
--- /dev/null
+++ b/tests/mpz/t-mul_i.c

@@ -0,0 +1,134 @@
+/* Test mpz_mul_ui and mpz_mul_si.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+mpz_t got, want, x;
+
+void
+compare_si (long y)
+{
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf    ("mpz_mul_si wrong\n");
+      mpz_trace ("  x", x);
+      printf    ("  y=%ld (0x%lX)\n", y, y);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+}
+
+void
+compare_ui (unsigned long y)
+{
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf    ("mpz_mul_ui wrong\n");
+      mpz_trace ("  x", x);
+      printf    ("  y=%lu (0x%lX)\n", y, y);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+}
+
+void
+check_samples (void)
+{
+  {
+    long  y;
+
+    mpz_set_ui (x, 1L);
+    y = 0;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = 1;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = -1;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = LONG_MIN;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = LONG_MAX;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+  }
+
+  {
+    unsigned long y;
+
+    mpz_set_ui (x, 1L);
+    y = 0;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+
+    mpz_set_ui (x, 1L);
+    y = 1;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+
+    mpz_set_ui (x, 1L);
+    y = ULONG_MAX;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+  }
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  mpz_init (x);
+  mpz_init (got);
+  mpz_init (want);
+
+  check_samples ();
+
+  mpz_clear (x);
+  mpz_clear (got);
+  mpz_clear (want);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-nextprime.c b/tests/mpz/t-nextprime.c
new file mode 100644
index 0000000..d2fbaef
--- /dev/null
+++ b/tests/mpz/t-nextprime.c

@@ -0,0 +1,459 @@
+/* Test mpz_nextprime.
+
+Copyright 2009, 2015, 2018, 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+refmpz_nextprime (mpz_ptr p, mpz_srcptr t)
+{
+  mpz_add_ui (p, t, 1L);
+  while (! mpz_probab_prime_p (p, 10))
+    mpz_add_ui (p, p, 1L);
+}
+
+void
+refmpz_prevprime (mpz_ptr p, mpz_srcptr t)
+{
+  if (mpz_cmp_ui(t, 2) <= 0)
+    return;
+
+  mpz_sub_ui (p, t, 1L);
+  while (! mpz_probab_prime_p (p, 10))
+    mpz_sub_ui (p, p, 1L);
+}
+
+void
+test_largegap (mpz_t low, const int gap)
+{
+  mpz_t t, nxt;
+  mpz_init (t);
+  mpz_init (nxt);
+
+  mpz_nextprime(nxt, low);
+  mpz_sub(t, nxt, low);
+
+  if (mpz_cmp_ui(t, gap) != 0)
+     {
+      gmp_printf ("nextprime gap %Zd => %Zd != %d\n", low, nxt, gap);
+      abort ();
+    }
+
+  mpz_prevprime(t, nxt);
+  if (mpz_cmp(t, low) != 0)
+    {
+      gmp_printf ("prevprime gap %Zd => %Zd != %d\n", nxt, t, gap);
+      abort ();
+    }
+
+  mpz_clear (t);
+  mpz_clear (nxt);
+}
+
+void
+test_largegaps ()
+{
+  mpz_t n;
+
+  mpz_init (n);
+
+  // largest gap with start < 2^32.
+  mpz_set_str (n, "3842610773", 10);
+  test_largegap (n, 336);
+
+  // largest gap with start < 2^64.
+  mpz_set_str (n, "18361375334787046697", 10);
+  test_largegap (n, 1550);
+
+  // test high merit primegap in the P30 digit range.
+  mpz_set_str (n, "3001549619028223830552751967", 10);
+  test_largegap (n, 2184);
+
+  // test high merit primegap in the P100 range.
+  mpz_primorial_ui (n, 257);
+  mpz_divexact_ui (n, n, 5610);
+  mpz_mul_ui (n, n, 4280516017UL);
+  mpz_sub_ui (n, n, 2560);
+  test_largegap (n, 9006);
+
+  // test high merit primegap in the P200 range.
+  mpz_primorial_ui (n, 409);
+  mpz_divexact_ui (n, n, 30);
+  mpz_mul_ui (n, n, 3483347771UL);
+  mpz_sub_ui (n, n, 7016);
+  test_largegap (n, 15900);
+
+  mpz_clear (n);
+}
+
+void
+test_bitboundaries ()
+{
+  mpz_t n;
+  mpz_init (n);
+
+  mpz_set_str (n, "0xfff1", 0);
+  test_largegap (n, 16);
+
+  mpz_set_str (n, "0xfffffffb", 0);
+  test_largegap (n, 20);
+
+  mpz_set_str (n, "0xffffffffffc5", 0);
+  test_largegap (n, 80);
+
+  mpz_set_str (n, "0xffffffffffffffc5", 0);
+  test_largegap (n, 72);
+
+  mpz_set_str (n, "0xffffffffffffffffffbf", 0);
+  test_largegap (n, 78);
+
+  mpz_set_str (n, "0xffffffffffffffffffffffef", 0);
+  test_largegap (n, 78);
+
+  mpz_set_str (n, "0xffffffffffffffffffffffffffb5", 0);
+  test_largegap (n, 100);
+
+  mpz_set_str (n, "0xffffffffffffffffffffffffffffff61", 0);
+  test_largegap (n, 210);
+
+  mpz_set_str (n, "0xffffffffffffffffffffffffffffffffffffffffffffff13", 0);
+  test_largegap (n, 370);
+
+  mpz_clear (n);
+}
+
+void
+run (const char *start, int reps, const char *end, short diffs[])
+{
+  mpz_t x, y;
+  int i;
+
+  mpz_init_set_str (x, start, 0);
+  mpz_init (y);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_nextprime (y, x);
+      mpz_sub (x, y, x);
+      if (diffs != NULL &&
+	  (! mpz_fits_sshort_p (x) || diffs[i] != (short) mpz_get_ui (x)))
+	{
+	  gmp_printf ("diff list discrepancy\n");
+	  abort ();
+	}
+      mpz_swap (x, y);
+    }
+
+  mpz_set_str (y, end, 0);
+
+  if (mpz_cmp (x, y) != 0)
+    {
+      gmp_printf ("got  %Zd\n", x);
+      gmp_printf ("want %Zd\n", y);
+      abort ();
+    }
+
+  mpz_clear (y);
+  mpz_clear (x);
+}
+
+void
+run_p (const char *start, int reps, const char *end, short diffs[])
+{
+  mpz_t x, y;
+  int i;
+
+  mpz_init_set_str (x, end, 0);
+  mpz_init (y);
+
+  // Last rep doesn't share same data with nextprime
+  for (i = 0; i < reps - 1; i++)
+    {
+      mpz_prevprime (y, x);
+      mpz_sub (x, x, y);
+      if (diffs != NULL &&
+	  (! mpz_fits_sshort_p (x) || diffs[reps - i - 1] != (short) mpz_get_ui (x)))
+	{
+	  gmp_printf ("diff list discrepancy %Zd, %d vs %d\n",
+                y, diffs[i], mpz_get_ui (x));
+	  abort ();
+	}
+      mpz_swap (x, y);
+    }
+
+  // starts aren't always prime, so check that result is less than or equal
+  mpz_prevprime(x, x);
+
+  mpz_set_str(y, start, 0);
+  if (mpz_cmp (x, y) > 0)
+    {
+      gmp_printf ("got  %Zd\n", x);
+      gmp_printf ("want %Zd\n", y);
+      abort ();
+    }
+
+  mpz_clear (y);
+  mpz_clear (x);
+}
+
+
+extern short diff1[];
+extern short diff3[];
+extern short diff4[];
+extern short diff5[];
+extern short diff6[];
+
+void
+test_ref (gmp_randstate_ptr rands, int reps,
+          void (*func)(mpz_t, const mpz_t),
+          void(*ref_func)(mpz_t, const mpz_t))
+{
+  int i;
+  mpz_t bs, x, test_p, ref_p;
+  unsigned long size_range;
+
+  mpz_init (bs);
+  mpz_init (x);
+  mpz_init (test_p);
+  mpz_init (ref_p);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 8 + 2; /* 0..1024 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (x, rands, mpz_get_ui (bs));
+
+      func (test_p, x);
+      ref_func (ref_p, x);
+      if (mpz_cmp (test_p, ref_p) != 0)
+        {
+          gmp_printf ("start %Zd\n", x);
+          gmp_printf ("got   %Zd\n", test_p);
+          gmp_printf ("want  %Zd\n", ref_p);
+	  abort ();
+        }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x);
+  mpz_clear (test_p);
+  mpz_clear (ref_p);
+}
+
+void
+test_nextprime(gmp_randstate_ptr rands, int reps)
+{
+  run ("2", 1000, "0x1ef7", diff1);
+
+  run ("3", 1000 - 1, "0x1ef7", NULL);
+
+  run ("0x8a43866f5776ccd5b02186e90d28946aeb0ed914", 50,
+       "0x8a43866f5776ccd5b02186e90d28946aeb0eeec5", diff3);
+
+  run ("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF6C", 50, /* 2^148 - 148 */
+       "0x100000000000000000000000000000000010ab", diff4);
+
+  run ("0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898d8b1b", 50,
+       "0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898da957", diff5);
+
+  run ("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF80", 50, /* 2^128 - 128 */
+       "0x10000000000000000000000000000155B", diff6);
+
+  test_ref(
+      rands, reps,
+      (void (*)(mpz_t, const mpz_t)) mpz_nextprime,
+      refmpz_nextprime);
+}
+
+void
+test_prevprime (gmp_randstate_ptr rands, int reps)
+{
+  long i;
+  int retval;
+  mpz_t n, prvp;
+
+  mpz_init (n);
+  mpz_init (prvp);
+
+  /* Test mpz_prevprime(n <= 2) returns 0, leaves rop unchanged. */
+  {
+    int temp = 123;
+    mpz_set_ui (prvp, temp);
+    for (i = 0; i <= 2; i++)
+      {
+        mpz_set_si(n, i);
+        retval = mpz_prevprime (prvp, n);
+        if ( retval != 0 || mpz_cmp_ui (prvp, temp) != 0 )
+          {
+            gmp_printf ("mpz_prevprime(%Zd) return (%d) rop (%Zd)\n", n, retval, prvp);
+            abort ();
+          }
+      }
+  }
+
+  mpz_clear (n);
+  mpz_clear (prvp);
+
+  run_p ("2", 1000, "0x1ef7", diff1);
+
+  run_p ("3", 1000 - 1, "0x1ef7", NULL);
+
+  run_p ("0x8a43866f5776ccd5b02186e90d28946aeb0ed914", 50,
+         "0x8a43866f5776ccd5b02186e90d28946aeb0eeec5", diff3);
+
+  run_p ("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF6C", 50, /* 2^148 - 148 */
+         "0x100000000000000000000000000000000010ab", diff4);
+
+  run_p ("0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898d8b1b", 50,
+         "0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898da957", diff5);
+
+  run_p ("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF80", 50, /* 2^128 - 128 */
+         "0x10000000000000000000000000000155B", diff6);
+
+  // Cast away int return from mpz_prevprime for test ref.
+  test_ref(
+      rands, reps,
+      (void (*)(mpz_t, const mpz_t)) mpz_prevprime,
+      refmpz_prevprime);
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  int reps = 20;
+
+  tests_start();
+
+  rands = RANDS;
+  TESTS_REPS (reps, argv, argc);
+
+  test_nextprime(rands, reps);
+  test_prevprime(rands, reps);
+
+  test_largegaps ();
+  test_bitboundaries ();
+
+  tests_end ();
+  return 0;
+}
+
+short diff1[] =
+{
+  1,2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,
+  2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
+  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,
+  2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
+  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,
+  2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
+  12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,
+  2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
+  6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,
+  10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
+  6,14,4,6,6,8,6,12,4,6,2,10,2,6,10,2,
+  10,2,6,18,4,2,4,6,6,8,6,6,22,2,10,8,
+  10,6,6,8,12,4,6,6,2,6,12,10,18,2,4,6,
+  2,6,4,2,4,12,2,6,34,6,6,8,18,10,14,4,
+  2,4,6,8,4,2,6,12,10,2,4,2,4,6,12,12,
+  8,12,6,4,6,8,4,8,4,14,4,6,2,4,6,2,
+  6,10,20,6,4,2,24,4,2,10,12,2,10,8,6,6,
+  6,18,6,4,2,12,10,12,8,16,14,6,4,2,4,2,
+  10,12,6,6,18,2,16,2,22,6,8,6,4,2,4,8,
+  6,10,2,10,14,10,6,12,2,4,2,10,12,2,16,2,
+  6,4,2,10,8,18,24,4,6,8,16,2,4,8,16,2,
+  4,8,6,6,4,12,2,22,6,2,6,4,6,14,6,4,
+  2,6,4,6,12,6,6,14,4,6,12,8,6,4,26,18,
+  10,8,4,6,2,6,22,12,2,16,8,4,12,14,10,2,
+  4,8,6,6,4,2,4,6,8,4,2,6,10,2,10,8,
+  4,14,10,12,2,6,4,2,16,14,4,6,8,6,4,18,
+  8,10,6,6,8,10,12,14,4,6,6,2,28,2,10,8,
+  4,14,4,8,12,6,12,4,6,20,10,2,16,26,4,2,
+  12,6,4,12,6,8,4,8,22,2,4,2,12,28,2,6,
+  6,6,4,6,2,12,4,12,2,10,2,16,2,16,6,20,
+  16,8,4,2,4,2,22,8,12,6,10,2,4,6,2,6,
+  10,2,12,10,2,10,14,6,4,6,8,6,6,16,12,2,
+  4,14,6,4,8,10,8,6,6,22,6,2,10,14,4,6,
+  18,2,10,14,4,2,10,14,4,8,18,4,6,2,4,6,
+  2,12,4,20,22,12,2,4,6,6,2,6,22,2,6,16,
+  6,12,2,6,12,16,2,4,6,14,4,2,18,24,10,6,
+  2,10,2,10,2,10,6,2,10,2,10,6,8,30,10,2,
+  10,8,6,10,18,6,12,12,2,18,6,4,6,6,18,2,
+  10,14,6,4,2,4,24,2,12,6,16,8,6,6,18,16,
+  2,4,6,2,6,6,10,6,12,12,18,2,6,4,18,8,
+  24,4,2,4,6,2,12,4,14,30,10,6,12,14,6,10,
+  12,2,4,6,8,6,10,2,4,14,6,6,4,6,2,10,
+  2,16,12,8,18,4,6,12,2,6,6,6,28,6,14,4,
+  8,10,8,12,18,4,2,4,24,12,6,2,16,6,6,14,
+  10,14,4,30,6,6,6,8,6,4,2,12,6,4,2,6,
+  22,6,2,4,18,2,4,12,2,6,4,26,6,6,4,8,
+  10,32,16,2,6,4,2,4,2,10,14,6,4,8,10,6,
+  20,4,2,6,30,4,8,10,6,6,8,6,12,4,6,2,
+  6,4,6,2,10,2,16,6,20,4,12,14,28,6,20,4,
+  18,8,6,4,6,14,6,6,10,2,10,12,8,10,2,10,
+  8,12,10,24,2,4,8,6,4,8,18,10,6,6,2,6,
+  10,12,2,10,6,6,6,8,6,10,6,2,6,6,6,10,
+  8,24,6,22,2,18,4,8,10,30,8,18,4,2,10,6,
+  2,6,4,18,8,12,18,16,6,2,12,6,10,2,10,2,
+  6,10,14,4,24,2,16,2,10,2,10,20,4,2,4,8,
+  16,6,6,2,12,16,8,4,6,30,2,10,2,6,4,6,
+  6,8,6,4,12,6,8,12,4,14,12,10,24,6,12,6,
+  2,22,8,18,10,6,14,4,2,6,10,8,6,4,6,30,
+  14,10,2,12,10,2,16,2,18,24,18,6,16,18,6,2,
+  18,4,6,2,10,8,10,6,6,8,4,6,2,10,2,12,
+  4,6,6,2,12,4,14,18,4,6,20,4,8,6,4,8,
+  4,14,6,4,14,12,4,2,30,4,24,6,6,12,12,14,
+  6,4,2,4,18,6,12,8
+};
+
+short diff3[] =
+{
+  33,32,136,116,24,22,104,114,76,278,238,162,36,44,388,134,
+  130,26,312,42,138,28,24,80,138,108,270,12,330,130,98,102,
+  162,34,36,170,90,34,14,6,24,66,154,218,70,132,188,88,
+  80,82
+};
+
+short diff4[] =
+{
+  239,92,64,6,104,24,46,258,68,18,54,100,68,154,26,4,
+  38,142,168,42,18,26,286,104,136,116,40,2,28,110,52,78,
+  104,24,54,96,4,626,196,24,56,36,52,102,48,156,26,18,
+  42,40
+};
+
+short diff5[] =
+{
+  268,120,320,184,396,2,94,108,20,318,274,14,64,122,220,108,
+  18,174,6,24,348,32,64,116,268,162,20,156,28,110,52,428,
+  196,14,262,30,194,120,300,66,268,12,428,370,212,198,192,130,
+  30,80
+};
+
+short diff6[] =
+{
+  179,30,84,108,112,36,42,110,52,132,60,30,326,114,496,92,100,
+  272,36,54,90,4,2,24,40,398,150,72,60,16,8,4,80,16,2,342,112,
+  14,136,236,40,18,50,192,198,204,40,266,42,274
+};

diff --git a/tests/mpz/t-oddeven.c b/tests/mpz/t-oddeven.c
new file mode 100644
index 0000000..eedad4b
--- /dev/null
+++ b/tests/mpz/t-oddeven.c

@@ -0,0 +1,87 @@
+/* Test mpz_odd_p and mpz_even_p.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *n;
+    int          odd, even;
+  } data[] = {
+    {   "0", 0, 1 },
+    {   "1", 1, 0 },
+    {   "2", 0, 1 },
+    {   "3", 1, 0 },
+    {   "4", 0, 1 },
+
+    {  "-4", 0, 1 },
+    {  "-3", 1, 0 },
+    {  "-2", 0, 1 },
+    {  "-1", 1, 0 },
+
+    {  "0x1000000000000000000000000000000000000000000000000000", 0, 1 },
+    {  "0x1000000000000000000000000000000000000000000000000001", 1, 0 },
+    {  "0x1000000000000000000000000000000000000000000000000002", 0, 1 },
+    {  "0x1000000000000000000000000000000000000000000000000003", 1, 0 },
+
+    { "-0x1000000000000000000000000000000000000000000000000004", 0, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000003", 1, 0 },
+    { "-0x1000000000000000000000000000000000000000000000000002", 0, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000001", 1, 0 },
+  };
+
+  mpz_t  n;
+  int    i;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+
+      if ((mpz_odd_p (n) != 0) != data[i].odd)
+	{
+	  printf ("mpz_odd_p wrong on data[%d]\n", i);
+	  abort();
+	}
+
+      if ((mpz_even_p (n) != 0) != data[i].even)
+	{
+	  printf ("mpz_even_p wrong on data[%d]\n", i);
+	  abort();
+	}
+    }
+
+  mpz_clear (n);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-perfpow.c b/tests/mpz/t-perfpow.c
new file mode 100644
index 0000000..84f9c8e
--- /dev/null
+++ b/tests/mpz/t-perfpow.c

@@ -0,0 +1,247 @@
+/* Test mpz_perfect_power_p.
+
+   Contributed to the GNU project by Torbjorn Granlund and Martin Boij.
+
+Copyright 2008-2010, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+struct
+{
+  const char *num_as_str;
+  char want;
+} static tests[] =
+  {
+    { "0", 1},
+    { "1", 1},
+    {"-1", 1},
+    { "2", 0},
+    {"-2", 0},
+    { "3", 0},
+    {"-3", 0},
+    { "4", 1},
+    {"-4", 0},
+    { "64", 1},
+    {"-64", 1},
+    { "128", 1},
+    {"-128", 1},
+    { "256", 1},
+    {"-256", 0},
+    { "512", 1},
+    {"-512", 1},
+    { "0x4000000", 1},
+    {"-0x4000000", 1},
+    { "0x3cab640", 1},
+    {"-0x3cab640", 0},
+    { "0x3e23840", 1},
+    {"-0x3e23840", 0},
+    { "0x3d3a7ed1", 1},
+    {"-0x3d3a7ed1", 1},
+    { "0x30a7a6000", 1},
+    {"-0x30a7a6000", 1},
+    { "0xf33e5a5a59", 1},
+    {"-0xf33e5a5a59", 0},
+    { "0xed1b1182118135d", 1},
+    {"-0xed1b1182118135d", 1},
+    { "0xe71f6eb7689cc276b2f1", 1},
+    {"-0xe71f6eb7689cc276b2f1", 0},
+    { "0x12644507fe78cf563a4b342c92e7da9fe5e99cb75a01", 1},
+    {"-0x12644507fe78cf563a4b342c92e7da9fe5e99cb75a01", 0},
+    { "0x1ff2e7c581bb0951df644885bd33f50e472b0b73a204e13cbe98fdb424d66561e4000000", 1},
+    {"-0x1ff2e7c581bb0951df644885bd33f50e472b0b73a204e13cbe98fdb424d66561e4000000", 1},
+    { "0x2b9b44db2d91a6f8165c8c7339ef73633228ea29e388592e80354e4380004aad84000000", 1},
+    {"-0x2b9b44db2d91a6f8165c8c7339ef73633228ea29e388592e80354e4380004aad84000000", 1},
+    { "0x28d5a2b8f330910a9d3cda06036ae0546442e5b1a83b26a436efea5b727bf1bcbe7e12b47d81", 1},
+    {"-0x28d5a2b8f330910a9d3cda06036ae0546442e5b1a83b26a436efea5b727bf1bcbe7e12b47d81", 1},
+    {NULL, 0}
+  };
+
+
+void
+check_tests ()
+{
+  mpz_t x;
+  int i;
+  int got, want;
+
+  mpz_init (x);
+
+  for (i = 0; tests[i].num_as_str != NULL; i++)
+    {
+      mpz_set_str (x, tests[i].num_as_str, 0);
+      got = mpz_perfect_power_p (x);
+      want = tests[i].want;
+      if (got != want)
+	{
+	  fprintf (stderr, "mpz_perfect_power_p returns %d when %d was expected\n", got, want);
+	  fprintf (stderr, "fault operand: %s\n", tests[i].num_as_str);
+	  abort ();
+	}
+    }
+
+  mpz_clear (x);
+}
+
+#define NRP 15
+
+void
+check_random (int reps)
+{
+  mpz_t n, np, temp, primes[NRP];
+  int i, j, k, unique, destroy, res;
+  unsigned long int nrprimes, primebits;
+  mp_limb_t g, exp[NRP], e;
+  gmp_randstate_ptr rands;
+
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (np);
+  mpz_init (temp);
+
+  for (i = 0; i < NRP; i++)
+    mpz_init (primes[i]);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (np, rands, 32);
+      nrprimes = mpz_get_ui (np) % NRP + 1; /* 1-NRP unique primes */
+
+      mpz_urandomb (np, rands, 32);
+      g = mpz_get_ui (np) % 32 + 2; /* gcd 2-33 */
+
+      for (j = 0; j < nrprimes;)
+	{
+	  mpz_urandomb (np, rands, 32);
+	  primebits = mpz_get_ui (np) % 100 + 3; /* 3-102 bit primes */
+	  mpz_urandomb (primes[j], rands, primebits);
+	  mpz_nextprime (primes[j], primes[j]);
+	  unique = 1;
+	  for (k = 0; k < j; k++)
+	    {
+	      if (mpz_cmp (primes[j], primes[k]) == 0)
+		{
+		  unique = 0;
+		  break;
+		}
+	    }
+	  if (unique)
+	    {
+	      mpz_urandomb (np, rands, 32);
+	      e = 371 / (10 * primebits) + mpz_get_ui (np) % 11 + 1; /* Magic constants */
+	      exp[j++] = g * e;
+	    }
+	}
+
+      if (nrprimes > 1)
+	{
+	  /* Destroy d exponents, d in [1, nrprimes - 1] */
+	  if (nrprimes == 2)
+	    {
+	      destroy = 1;
+	    }
+	  else
+	    {
+	      mpz_urandomb (np, rands, 32);
+	      destroy = mpz_get_ui (np) % (nrprimes - 2);
+	    }
+
+	  g = exp[destroy];
+	  for (k = destroy + 1; k < nrprimes; k++)
+	    g = mpn_gcd_1 (&g, 1, exp[k]);
+
+	  for (j = 0; j < destroy; j++)
+	    {
+	      mpz_urandomb (np, rands, 32);
+	      e = mpz_get_ui (np) % 50 + 1;
+	      while (mpn_gcd_1 (&g, 1, e) > 1)
+		e++;
+
+	      exp[j] = e;
+	    }
+	}
+
+      /* Compute n */
+      mpz_pow_ui (n, primes[0], exp[0]);
+      for (j = 1; j < nrprimes; j++)
+	{
+	  mpz_pow_ui (temp, primes[j], exp[j]);
+	  mpz_mul (n, n, temp);
+	}
+
+      res = mpz_perfect_power_p (n);
+
+      if (nrprimes == 1)
+	{
+	if (res == 0 && exp[0] > 1)
+	  {
+	    printf("n is a perfect power, perfpow_p disagrees\n");
+	    gmp_printf("n = %Zu\nprimes[0] = %Zu\nexp[0] = %lu\n", n, primes[0], exp[0]);
+	    abort ();
+	  }
+	else if (res == 1 && exp[0] == 1)
+	  {
+	    gmp_printf("n = %Zu\n", n);
+	    printf("n is now a prime number, but perfpow_p still believes n is a perfect power\n");
+	    abort ();
+	  }
+	}
+      else
+	{
+	  if (res == 1 && destroy != 0)
+	    {
+	      gmp_printf("n = %Zu\nn was destroyed, but perfpow_p still believes n is a perfect power\n", n);
+	      abort ();
+	    }
+	  else if (res == 0 && destroy == 0)
+	    {
+	      gmp_printf("n = %Zu\nn is a perfect power, perfpow_p disagrees\n", n);
+	      abort ();
+	    }
+	}
+    }
+
+  mpz_clear (n);
+  mpz_clear (np);
+  mpz_clear (temp);
+  for (i = 0; i < NRP; i++)
+    mpz_clear (primes[i]);
+}
+
+int
+main (int argc, char **argv)
+{
+  int n_tests;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_tests ();
+
+  n_tests = 500;
+  if (argc == 2)
+    n_tests = atoi (argv[1]);
+  check_random (n_tests);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-perfsqr.c b/tests/mpz/t-perfsqr.c
new file mode 100644
index 0000000..2223593
--- /dev/null
+++ b/tests/mpz/t-perfsqr.c

@@ -0,0 +1,154 @@
+/* Test mpz_perfect_square_p.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include "mpn/perfsqr.h"
+
+
+/* check_modulo() exercises mpz_perfect_square_p on squares which cover each
+   possible quadratic residue to each divisor used within
+   mpn_perfect_square_p, ensuring those residues aren't incorrectly claimed
+   to be non-residues.
+
+   Each divisor is taken separately.  It's arranged that n is congruent to 0
+   modulo the other divisors, 0 of course being a quadratic residue to any
+   modulus.
+
+   The values "(j*others)^2" cover all quadratic residues mod divisor[i],
+   but in no particular order.  j is run from 1<=j<=divisor[i] so that zero
+   is excluded.  A literal n==0 doesn't reach the residue tests.  */
+
+void
+check_modulo (void)
+{
+  static const unsigned long  divisor[] = PERFSQR_DIVISORS;
+  unsigned long  i, j;
+
+  mpz_t  alldiv, others, n;
+
+  mpz_init (alldiv);
+  mpz_init (others);
+  mpz_init (n);
+
+  /* product of all divisors */
+  mpz_set_ui (alldiv, 1L);
+  for (i = 0; i < numberof (divisor); i++)
+    mpz_mul_ui (alldiv, alldiv, divisor[i]);
+
+  for (i = 0; i < numberof (divisor); i++)
+    {
+      /* product of all divisors except i */
+      mpz_set_ui (others, 1L);
+      for (j = 0; j < numberof (divisor); j++)
+        if (i != j)
+          mpz_mul_ui (others, others, divisor[j]);
+
+      for (j = 1; j <= divisor[i]; j++)
+        {
+          /* square */
+          mpz_mul_ui (n, others, j);
+          mpz_mul (n, n, n);
+          if (! mpz_perfect_square_p (n))
+            {
+              printf ("mpz_perfect_square_p got 0, want 1\n");
+              mpz_trace ("  n", n);
+              abort ();
+            }
+        }
+    }
+
+  mpz_clear (alldiv);
+  mpz_clear (others);
+  mpz_clear (n);
+}
+
+
+/* Exercise mpz_perfect_square_p compared to what mpz_sqrt says. */
+void
+check_sqrt (int reps)
+{
+  mpz_t x2, x2t, x;
+  mp_size_t x2n;
+  int res;
+  int i;
+  /* int cnt = 0; */
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t bs;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (x);
+  mpz_init (x2t);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 9);
+      x2n = mpz_get_ui (bs);
+      mpz_rrandomb (x2, rands, x2n);
+      /* mpz_out_str (stdout, -16, x2); puts (""); */
+
+      res = mpz_perfect_square_p (x2);
+      mpz_sqrt (x, x2);
+      mpz_mul (x2t, x, x);
+
+      if (res != (mpz_cmp (x2, x2t) == 0))
+        {
+          printf    ("mpz_perfect_square_p and mpz_sqrt differ\n");
+          mpz_trace ("   x  ", x);
+          mpz_trace ("   x2 ", x2);
+          mpz_trace ("   x2t", x2t);
+          printf    ("   mpz_perfect_square_p %d\n", res);
+          printf    ("   mpz_sqrt             %d\n", mpz_cmp (x2, x2t) == 0);
+          abort ();
+        }
+
+      /* cnt += res != 0; */
+    }
+  /* printf ("%d/%d perfect squares\n", cnt, reps); */
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (x);
+  mpz_clear (x2t);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  int reps = 200000;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  check_modulo ();
+  check_sqrt (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-popcount.c b/tests/mpz/t-popcount.c
new file mode 100644
index 0000000..8952cc2
--- /dev/null
+++ b/tests/mpz/t-popcount.c

@@ -0,0 +1,167 @@
+/* Test mpz_popcount.
+
+Copyright 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+
+void
+check_onebit (void)
+{
+  mpz_t          n;
+  unsigned long  i, got;
+
+  mpz_init (n);
+  for (i = 0; i < 5 * GMP_LIMB_BITS; i++)
+    {
+      mpz_setbit (n, i);
+      got = mpz_popcount (n);
+      if (got != 1)
+	{
+	  printf ("mpz_popcount wrong on single bit at %lu\n", i);
+	  printf ("   got %lu, want 1\n", got);
+	  abort();
+	}
+      mpz_clrbit (n, i);
+    }
+  mpz_clear (n);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *n;
+    unsigned long  want;
+  } data[] = {
+    { "-1", ~ (unsigned long) 0 },
+    { "-12345678", ~ (unsigned long) 0 },
+    { "0", 0 },
+    { "1", 1 },
+    { "3", 2 },
+    { "5", 2 },
+    { "0xFFFF", 16 },
+    { "0xFFFFFFFF", 32 },
+    { "0xFFFFFFFFFFFFFFFF", 64 },
+    { "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 128 },
+  };
+
+  unsigned long   got;
+  int    i;
+  mpz_t  n;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+      got = mpz_popcount (n);
+      if (got != data[i].want)
+	{
+	  printf ("mpz_popcount wrong at data[%d]\n", i);
+	  printf ("   n     \"%s\"\n", data[i].n);
+	  printf ("         ");   mpz_out_str (stdout, 10, n); printf ("\n");
+	  printf ("         0x"); mpz_out_str (stdout, 16, n); printf ("\n");
+	  printf ("   got   %lu\n", got);
+	  printf ("   want  %lu\n", data[i].want);
+	  abort ();
+	}
+    }
+  mpz_clear (n);
+}
+
+unsigned long
+refmpz_popcount (mpz_t arg)
+{
+  mp_size_t n, i;
+  unsigned long cnt;
+  mp_limb_t x;
+
+  n = SIZ(arg);
+  if (n < 0)
+    return ~(unsigned long) 0;
+
+  cnt = 0;
+  for (i = 0; i < n; i++)
+    {
+      x = PTR(arg)[i];
+      while (x != 0)
+	{
+	  cnt += (x & 1);
+	  x >>= 1;
+	}
+    }
+  return cnt;
+}
+
+void
+check_random (void)
+{
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  mpz_t arg;
+  unsigned long arg_size, size_range;
+  unsigned long got, ref;
+  int i;
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (arg);
+
+  for (i = 0; i < 10000; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      arg_size = mpz_get_ui (bs);
+      mpz_rrandomb (arg, rands, arg_size);
+
+      got = mpz_popcount (arg);
+      ref = refmpz_popcount (arg);
+      if (got != ref)
+	{
+	  printf ("mpz_popcount wrong on random\n");
+	  printf ("         ");   mpz_out_str (stdout, 10, arg); printf ("\n");
+	  printf ("         0x"); mpz_out_str (stdout, 16, arg); printf ("\n");
+	  printf ("   got   %lu\n", got);
+	  printf ("   want  %lu\n", ref);
+	  abort ();
+	}
+    }
+  mpz_clear (arg);
+  mpz_clear (bs);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_onebit ();
+  check_data ();
+  check_random ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c
new file mode 100644
index 0000000..ff41721
--- /dev/null
+++ b/tests/mpz/t-pow.c

@@ -0,0 +1,217 @@
+/* Test mpz_pow_ui and mpz_ui_pow_ui.
+
+Copyright 1997, 1999-2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr want, mpz_srcptr base, unsigned long exp)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+
+  MPZ_CHECK_FORMAT (want);
+
+  mpz_pow_ui (got, base, exp);
+  if (mpz_cmp (got, want))
+    {
+      printf ("mpz_pow_ui wrong\n");
+      mpz_trace ("  base", base);
+      printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+
+  mpz_set (got, base);
+  mpz_pow_ui (got, got, exp);
+  if (mpz_cmp (got, want))
+    {
+      printf ("mpz_pow_ui wrong\n");
+      mpz_trace ("  base", base);
+      printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+
+  if (mpz_fits_ulong_p (base))
+    {
+      unsigned long  base_u = mpz_get_ui (base);
+      mpz_ui_pow_ui (got, base_u, exp);
+      if (mpz_cmp (got, want))
+	{
+	  printf    ("mpz_ui_pow_ui wrong\n");
+	  printf    ("  base=%lu (0x%lX)\n", base_u, base_u);
+	  printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+	  mpz_trace ("  got ", got);
+	  mpz_trace ("  want", want);
+	  abort ();
+	}
+    }
+
+  mpz_clear (got);
+}
+
+void
+check_base (mpz_srcptr base)
+{
+  unsigned long  exp;
+  mpz_t          want;
+
+  mpz_init (want);
+  mpz_set_ui (want, 1L);
+
+  for (exp = 0; exp < 20; exp++)
+    {
+      check_one (want, base, exp);
+      mpz_mul (want, want, base);
+    }
+
+  mpz_clear (want);
+}
+
+void
+check_various (void)
+{
+  static const struct {
+    const char *base;
+  } data[] = {
+    { "0" },
+    { "1" },
+    { "2" },
+    { "3" },
+    { "4" },
+    { "5" },
+    { "6" },
+    { "10" },
+    { "15" },
+    { "16" },
+
+    { "0x1F" },
+    { "0xFF" },
+    { "0x1001" },
+    { "0xFFFF" },
+    { "0x10000001" },
+    { "0x1000000000000001" },
+
+    /* actual size closest to estimate */
+    { "0xFFFFFFFF" },
+    { "0xFFFFFFFFFFFFFFFF" },
+
+    /* same after rshift */
+    { "0xFFFFFFFF0" },
+    { "0xFFFFFFFF00" },
+    { "0xFFFFFFFFFFFFFFFF0" },
+    { "0xFFFFFFFFFFFFFFFF00" },
+
+    /* change from 2 limbs to 1 after rshift */
+    { "0x180000000" },
+    { "0x18000000000000000" },
+
+    /* change from 3 limbs to 2 after rshift */
+    { "0x18000000100000000" },
+    { "0x180000000000000010000000000000000" },
+
+    /* handling of absolute value */
+    { "-0x80000000" },
+    { "-0x8000000000000000" },
+
+    /* low zero limb, and size>2, checking argument overlap detection */
+    { "0x3000000000000000300000000000000030000000000000000" },
+  };
+
+  mpz_t  base;
+  int    i;
+
+  mpz_init (base);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (base, data[i].base, 0);
+      check_base (base);
+    }
+
+  mpz_clear (base);
+}
+
+void
+check_random (int reps)
+{
+  mpz_t              base, want;
+  mp_size_t          base_size;
+  int                i;
+  unsigned long      size_range, exp;
+  gmp_randstate_ptr  rands = RANDS;
+
+  mpz_init (base);
+  mpz_init (want);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* exponentially random 0 to 2^13 bits for base */
+      mpz_urandomb (want, rands, 32);
+      size_range = mpz_get_ui (want) % 12 + 2;
+      mpz_urandomb (want, rands, size_range);
+      base_size = mpz_get_ui (want);
+      mpz_rrandomb (base, rands, base_size);
+
+      /* randomly signed base */
+      mpz_urandomb (want, rands, 2);
+      if ((mpz_get_ui (want) & 1) != 0)
+	mpz_neg (base, base);
+
+      /* random 5 bits for exponent */
+      mpz_urandomb (want, rands, 5L);
+      exp = mpz_get_ui (want);
+
+      refmpz_pow_ui (want, base, exp);
+      check_one (want, base, exp);
+    }
+
+  mpz_clear (base);
+  mpz_clear (want);
+}
+
+int
+main (int argc, char **argv)
+{
+  int reps = 5000;
+
+  /* dummy call to drag in refmpn.o for testing mpz/n_pow_ui.c with
+     refmpn_mul_2 */
+  refmpn_zero_p (NULL, (mp_size_t) 0);
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  check_various ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-powm.c b/tests/mpz/t-powm.c
new file mode 100644
index 0000000..1a25ed7
--- /dev/null
+++ b/tests/mpz/t-powm.c

@@ -0,0 +1,263 @@
+/* Test mpz_powm, mpz_mul, mpz_mod, mpz_mod_ui, mpz_div_ui.
+
+Copyright 1991, 1993, 1994, 1996, 1999-2001, 2009, 2012, 2019 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t, int);
+
+#define SIZEM 13
+
+/* Check that all sizes up to just above MUL_TOOM22_THRESHOLD have been tested
+   a few times.  FIXME: If SIZEM is set too low, this will never happen.  */
+int
+allsizes_seen (unsigned int *allsizes)
+{
+  mp_size_t i;
+
+  for (i = 1; i < MUL_TOOM22_THRESHOLD + 4; i++)
+    if (allsizes[i] < 4)
+      return 0;
+  return 1;
+}
+
+void
+small_2pow (unsigned long reps)
+{
+  mpz_t du, exp, mod;
+  mpz_t r1;
+  unsigned long m, e, r;
+  mp_limb_t b0 = 2;
+
+  mpz_roinit_n (du, &b0, 1);
+  mpz_init (exp);
+  mpz_init (mod);
+  mpz_init (r1);
+
+  for (m = 3; m * m < reps; m += 2)
+    {
+      mpz_set_ui (mod, m);
+      r = 1;
+      for (e = 0; e < m; e += 1)
+	{
+	  mpz_set_ui (exp, e);
+	  mpz_powm (r1, du, exp, mod);
+	  MPZ_CHECK_FORMAT (r1);
+	  if (mpz_cmp_ui (r1, r) != 0)
+	    {
+	      fprintf (stderr, "\nIncorrect result for operands:\n");
+	      debug_mp (du, -16);
+	      debug_mp (exp, -16);
+	      debug_mp (mod, -16);
+	      fprintf (stderr, "mpz_powm result:\n");
+	      debug_mp (r1, -16);
+	      fprintf (stderr, "Should be 2 ^ 0x%lx = 0x%lx (mod 0x%lx)\n", e, r, m);
+	      abort ();
+	    }
+	  if (r > (m >> 1))
+	    r = (r << 1) - m;
+	  else
+	    r = r << 1;
+	}
+    }
+
+  mpz_clear (exp);
+  mpz_clear (mod);
+  mpz_clear (r1);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t base, exp, mod;
+  mpz_t r1, r2, t1, exp2, base2;
+  mp_size_t base_size, exp_size, mod_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned int allsizes[1 << (SIZEM + 2 - 1)];
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  small_2pow ((unsigned int) reps);
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (base);
+  mpz_init (exp);
+  mpz_init (mod);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (t1);
+  mpz_init (exp2);
+  mpz_init (base2);
+
+  memset (allsizes, 0, (1 << (SIZEM + 2 - 1)) * sizeof (int));
+
+  reps += reps >> 3;
+  for (i = 0; i < reps || ! allsizes_seen (allsizes); i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % SIZEM + 2;
+
+      if ((i & 7) == 0)
+	{
+	  mpz_set_ui (exp, 1);
+
+	  do  /* Loop until mathematically well-defined.  */
+	    {
+	      mpz_urandomb (bs, rands, size_range / 2 + 2);
+	      base_size = mpz_get_ui (bs);
+	      mpz_rrandomb (base, rands, base_size);
+	    }
+	  while (mpz_cmp_ui (base, 0) == 0);
+
+	  mpz_urandomb (bs, rands, size_range / 2);
+	  mod_size = mpz_get_ui (bs);
+	  mod_size = MIN (mod_size, base_size);
+	  mpz_rrandomb (mod, rands, mod_size);
+
+	  mpz_urandomb (bs, rands, size_range);
+	  mod_size = mpz_get_ui (bs) + base_size + 2;
+	  if ((i & 8) == 0)
+	    mod_size += GMP_NUMB_BITS - mod_size % GMP_NUMB_BITS;
+	  mpz_setbit (mod, mod_size);
+
+	  mpz_sub (base, base, mod);
+	}
+      else
+	{
+      do  /* Loop until mathematically well-defined.  */
+	{
+	  if ((i & 7) == 4)
+	    mpz_set_ui (base, 2);
+	  else
+	    {
+	      mpz_urandomb (bs, rands, size_range);
+	      base_size = mpz_get_ui (bs);
+	      mpz_rrandomb (base, rands, base_size);
+	    }
+
+	  mpz_urandomb (bs, rands, 7L);
+	  exp_size = mpz_get_ui (bs);
+	  mpz_rrandomb (exp, rands, exp_size);
+	}
+      while (mpz_cmp_ui (base, 0) == 0 && mpz_cmp_ui (exp, 0) == 0);
+
+      do
+        {
+	  mpz_urandomb (bs, rands, size_range);
+	  mod_size = mpz_get_ui (bs);
+	  mpz_rrandomb (mod, rands, mod_size);
+	}
+      while (mpz_cmp_ui (mod, 0) == 0);
+
+      allsizes[SIZ(mod)] += 1;
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (base, base);
+
+      /* printf ("%ld %ld %ld\n", SIZ (base), SIZ (exp), SIZ (mod)); */
+	}
+
+      mpz_set_ui (r2, 1);
+      mpz_mod (base2, base, mod);
+      mpz_set (exp2, exp);
+      mpz_mod (r2, r2, mod);
+
+      for (;;)
+	{
+	  if (mpz_tstbit (exp2, 0))
+	    {
+	      mpz_mul (r2, r2, base2);
+	      mpz_mod (r2, r2, mod);
+	    }
+	  if  (mpz_cmp_ui (exp2, 1) <= 0)
+	    break;
+	  mpz_mul (base2, base2, base2);
+	  mpz_mod (base2, base2, mod);
+	  mpz_tdiv_q_2exp (exp2, exp2, 1);
+	}
+
+      mpz_powm (r1, base, exp, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+      if (mpz_cmp (r1, r2) != 0)
+	{
+	  fprintf (stderr, "\nIncorrect results in test %d for operands:\n", i);
+	  debug_mp (base, -16);
+	  debug_mp (exp, -16);
+	  debug_mp (mod, -16);
+	  fprintf (stderr, "mpz_powm result:\n");
+	  debug_mp (r1, -16);
+	  fprintf (stderr, "reference result:\n");
+	  debug_mp (r2, -16);
+	  abort ();
+	}
+
+      if (mpz_tdiv_ui (mod, 2) == 0)
+	continue;
+
+      mpz_powm_sec (r1, base, exp, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+      if (mpz_cmp (r1, r2) != 0)
+	{
+	  fprintf (stderr, "\nIncorrect results in test %d for operands:\n", i);
+	  debug_mp (base, -16);
+	  debug_mp (exp, -16);
+	  debug_mp (mod, -16);
+	  fprintf (stderr, "mpz_powm_sec result:\n");
+	  debug_mp (r1, -16);
+	  fprintf (stderr, "reference result:\n");
+	  debug_mp (r2, -16);
+	  abort ();
+	}
+    }
+
+  mpz_clear (bs);
+  mpz_clear (base);
+  mpz_clear (exp);
+  mpz_clear (mod);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (t1);
+  mpz_clear (exp2);
+  mpz_clear (base2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-powm_ui.c b/tests/mpz/t-powm_ui.c
new file mode 100644
index 0000000..5b446c5
--- /dev/null
+++ b/tests/mpz/t-powm_ui.c

@@ -0,0 +1,127 @@
+/* Test mpz_powm_ui, mpz_mul, mpz_mod.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpz_t base, exp, mod;
+  mpz_t r1, r2, base2;
+  mp_size_t base_size, exp_size, mod_size;
+  unsigned long int exp2;
+  int i;
+  int reps = 100;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  TESTS_REPS (reps, argv, argc);
+
+  mpz_inits (bs, base, exp, mod, r1, r2, base2, NULL);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 18 + 2;
+
+      do  /* Loop until mathematically well-defined.  */
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  base_size = mpz_get_ui (bs);
+	  mpz_rrandomb (base, rands, base_size);
+
+	  mpz_urandomb (bs, rands, 6L);
+	  exp_size = mpz_get_ui (bs);
+	  mpz_rrandomb (exp, rands, exp_size);
+	  exp2 = mpz_getlimbn (exp, (mp_size_t) 0);
+	}
+      while (mpz_cmp_ui (base, 0) == 0 && exp2 == 0);
+
+      do
+        {
+	  mpz_urandomb (bs, rands, size_range);
+	  mod_size = mpz_get_ui (bs);
+	  mpz_rrandomb (mod, rands, mod_size);
+	}
+      while (mpz_cmp_ui (mod, 0) == 0);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (base, base);
+
+      /* printf ("%ld %ld\n", SIZ (base), SIZ (mod)); */
+
+#if 0
+      putc ('\n', stderr);
+      gmp_fprintf (stderr, "B = 0x%Zx\n", base);
+      gmp_fprintf (stderr, "M = 0x%Zx\n", mod);
+#endif
+
+      exp2 = mpz_getlimbn (exp, (mp_size_t) 0);
+      mpz_set_ui (r2, 1);
+      mpz_set (base2, base);
+      mpz_mod (r2, r2, mod);	/* needed when exp==0 and mod==1 */
+      while (exp2 != 0)
+	{
+	  if (exp2 % 2 != 0)
+	    {
+	      mpz_mul (r2, r2, base2);
+	      mpz_mod (r2, r2, mod);
+	    }
+	  mpz_mul (base2, base2, base2);
+	  mpz_mod (base2, base2, mod);
+	  exp2 = exp2 / 2;
+	}
+
+      exp2 = mpz_getlimbn (exp, (mp_size_t) 0);
+      mpz_powm_ui (r1, base, exp2, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+#if 0
+      gmp_fprintf (stderr, "R   = 0x%Zx\n", r1);
+      gmp_fprintf (stderr, "REF = 0x%Zx\n", r2);
+#endif
+
+      if (mpz_cmp (r1, r2) != 0)
+	{
+	  fprintf (stderr, "\ntest %d: Incorrect results for operands:\n", i);
+	  gmp_fprintf (stderr, "B = 0x%Zx\n", base);
+	  gmp_fprintf (stderr, "E = 0x%Zx\n", exp);
+	  gmp_fprintf (stderr, "M = 0x%Zx\n", mod);
+	  gmp_fprintf (stderr, "R   = 0x%Zx\n", r1);
+	  gmp_fprintf (stderr, "REF = 0x%Zx\n", r2);
+	  abort ();
+	}
+    }
+
+  mpz_clears (bs, base, exp, mod, r1, r2, base2, NULL);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-pprime_p.c b/tests/mpz/t-pprime_p.c
new file mode 100644
index 0000000..dffe6ea
--- /dev/null
+++ b/tests/mpz/t-pprime_p.c

@@ -0,0 +1,243 @@
+/* Exercise mpz_probab_prime_p.
+
+Copyright 2002, 2018-2019, 2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Enhancements:
+
+   - Test some big primes don't come back claimed to be composite.
+   - Test some big composites don't come back claimed to be certainly prime.
+   - Test some big composites with small factors are identified as certainly
+     composite.  */
+
+
+/* return 2 if prime, 0 if composite */
+int
+isprime (unsigned long n)
+{
+  if (n < 4)
+    return (n & 2);
+  if ((n & 1) == 0)
+    return 0;
+
+  for (unsigned long i = 3; i*i <= n; i+=2)
+    if ((n % i) == 0)
+      return 0;
+
+  return 2;
+}
+
+void
+check_one (mpz_srcptr n, int want)
+{
+  int  got;
+
+  got = mpz_probab_prime_p (n, 25);
+
+  /* "definitely prime" (2) is fine if we only wanted "probably prime" (1) */
+  if ((got != want) && (got != want * 2))
+    {
+      printf ("mpz_probab_prime_p\n");
+      mpz_trace ("  n    ", n);
+      printf    ("  got =%d", got);
+      printf    ("  want=%d", want);
+      abort ();
+    }
+}
+
+void
+check_pn (mpz_ptr n, int want)
+{
+  check_one (n, want);
+  mpz_neg (n, n);
+  check_one (n, want);
+}
+
+/* expect certainty for small n */
+void
+check_small (void)
+{
+  mpz_t  n;
+  long   i;
+
+  mpz_init (n);
+
+  for (i = 0; i < 300; i++)
+    {
+      mpz_set_si (n, i);
+      check_pn (n, isprime (i));
+    }
+
+  mpz_clear (n);
+}
+
+void
+check_composites (int count)
+{
+  int i;
+  mpz_t a, b, n, bs;
+  unsigned long size_range, size;
+  gmp_randstate_ptr rands = RANDS;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (n);
+  mpz_init (bs);
+
+  static const char * const composites[] = {
+    "225670644213750121",	/* n=61*C16, if D < 61, (n/D) = 1.	*/
+    "2386342059899637841",	/* n=61*C17, if D < 61, (n/D) = 1.	*/
+    "1194649",	/* A square, but strong base-2 pseudoprime,	*/
+    "12327121",	/* another base-2 pseudoprime square.	*/
+    "18446744066047760377",	/* Should trigger Fibonacci's test;	*/
+    "10323769",			/* &3==1, Lucas' test with D=37;	*/
+    "1397419",			/* &3==3, Lucas' test with D=43;	*/
+    "11708069165918597341",	/* &3==1, Lucas' test with large D=107;	*/
+    "395009109077493751",	/* &3==3, Lucas' test with large D=113.	*/
+    NULL
+  };
+
+  for (i = 0; composites[i]; i++)
+    {
+      mpz_set_str_or_abort (n, composites[i], 0);
+      check_one (n, 0);
+    }
+
+  for (i = 0; i < count; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 1; /* 0..8192 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (a, rands, size);
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 1; /* 0..8192 bit operands */
+      mpz_rrandomb (b, rands, size);
+
+      /* Exclude trivial factors */
+      if (mpz_cmp_ui (a, 1) == 0)
+	mpz_set_ui (a, 2);
+      if (mpz_cmp_ui (b, 1) == 0)
+	mpz_set_ui (b, 2);
+
+      mpz_mul (n, a, b);
+
+      check_pn (n, 0);
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (n);
+  mpz_clear (bs);
+}
+
+static void
+check_primes (void)
+{
+  static const char * const primes[] = {
+    "2", "53", "1234567891",
+    "2055693949", "1125899906842597", "16412292043871650369",
+    "18446744075358702679",	/* Lucas' test with large D=107.	*/
+    /* diffie-hellman-group1-sha1, also "Well known group 2" in RFC
+       2412, 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 } */
+    "0xFFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
+    "29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
+    "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
+    "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
+    "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381"
+    "FFFFFFFFFFFFFFFF",
+    NULL
+  };
+
+  mpz_t n;
+  int i;
+
+  mpz_init (n);
+
+  for (i = 0; primes[i]; i++)
+    {
+      mpz_set_str_or_abort (n, primes[i], 0);
+      check_one (n, 1);
+    }
+  mpz_clear (n);
+}
+
+static void
+check_fermat_mersenne (int count)
+{
+  int fermat_exponents [] = {1, 2, 4, 8, 16};
+  int mersenne_exponents [] = {2, 3, 5, 7, 13, 17, 19, 31, 61, 89,
+			       107, 127, 521, 607, 1279, 2203, 2281,
+			       3217, 4253, 4423, 9689, 9941, 11213,
+			       19937, 21701, 23209, 44497, 86243};
+  mpz_t pp;
+  int i, j, want;
+
+  mpz_init (pp);
+  count = MIN (110000, count);
+
+  for (i=1; i<count; ++i)
+    {
+      mpz_set_ui (pp, 1);
+      mpz_setbit (pp, i); /* 2^i + 1 */
+      want = 0;
+      for (j = 0; j < numberof (fermat_exponents); j++)
+	if (fermat_exponents[j] == i)
+	  {
+	    /* Fermat's primes are small enough for a definite answer. */
+	    want = 2;
+	    break;
+	  }
+      check_one (pp, want);
+
+      mpz_sub_ui (pp, pp, 2); /* 2^i - 1 */
+      want = 0;
+      for (j = 0; j < numberof (mersenne_exponents); j++)
+	if (mersenne_exponents[j] == i)
+	  {
+	    want = 1 << (i < 50);
+	    break;
+	  }
+      check_one (pp, want);
+    }
+  mpz_clear (pp);
+}
+
+int
+main (int argc, char **argv)
+{
+  int count = 1000;
+
+  TESTS_REPS (count, argv, argc);
+
+  tests_start ();
+
+  check_small ();
+  check_fermat_mersenne (count >> 3);
+  check_composites (count);
+  check_primes ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-primorial_ui.c b/tests/mpz/t-primorial_ui.c
new file mode 100644
index 0000000..b4d2bfe
--- /dev/null
+++ b/tests/mpz/t-primorial_ui.c

@@ -0,0 +1,145 @@
+/* Exercise mpz_primorial_ui.
+
+Copyright 2000-2002, 2012, 2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-primorial_ui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+static int isprime (unsigned long int t);
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 2222;
+  gmp_randstate_ptr rands;
+  mpz_t          f, r, bs;
+
+  tests_start ();
+  rands = RANDS;
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else
+    TESTS_REPS (limit, argv, argc);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  mpz_init_set_ui (f, 1);  /* 0# = 1 */
+  mpz_init (r);
+
+  n = 0;
+  do
+    {
+      mpz_primorial_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (f, r) != 0)
+	{
+	  printf ("mpz_primorial_ui(%lu) wrong\n", n);
+	  printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+	  printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+	  abort ();
+	}
+
+      if (isprime (++n))
+	mpz_mul_ui (f, f, n);  /* p# = (p-1)# * (p) */
+      if (n%16 == 0) { mpz_clear (r); mpz_init (r); }
+    } while (n < limit);
+
+  n = 0; limit =1;
+  mpz_init (bs);
+  do
+    {
+      unsigned long i, d;
+
+      mpz_urandomb (bs, rands, 21);
+      i = mpz_get_ui (bs);
+      mpz_urandomb (bs, rands, 9);
+      d = mpz_get_ui (bs) + 3*64;
+      mpz_primorial_ui (f, i);
+      MPZ_CHECK_FORMAT (f);
+      mpz_primorial_ui (r, i+d);
+      MPZ_CHECK_FORMAT (r);
+
+      do {
+	if (isprime (++i))
+	  mpz_mul_ui (f, f, i);
+      } while (--d != 0);
+
+      if (mpz_cmp (f, r) != 0)
+	{
+	  printf ("mpz_primorial_ui(%lu) wrong\n", i);
+	  printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+	  printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+	  abort ();
+	}
+    } while (++n < limit);
+  /* Chech a single "big" value, modulo a larger prime */
+  n = 2095637;
+  mpz_primorial_ui (r, n);
+  mpz_set_ui (f, 13);
+  mpz_setbit (f, 64); /* f = 2^64 + 13 */
+  mpz_tdiv_r (r, r, f);
+  mpz_set_str (f, "BAFCBF3C95B217D5", 16);
+
+  if (mpz_cmp (f, r) != 0)
+    {
+      printf ("mpz_primorial_ui(%lu) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+      abort ();
+    }
+
+  mpz_clear (bs);
+  mpz_clear (f);
+  mpz_clear (r);
+
+  tests_end ();
+
+  exit (0);
+}
+
+static int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 3 || (t & 1) == 0)
+    return t == 2;
+
+  for (d = 3, r = 1; r != 0; d += 2)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+	return 1;
+    }
+  return 0;
+}

diff --git a/tests/mpz/t-remove.c b/tests/mpz/t-remove.c
new file mode 100644
index 0000000..07ba918
--- /dev/null
+++ b/tests/mpz/t-remove.c

@@ -0,0 +1,146 @@
+/* Test mpz_remove.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2009, 2012, 2013 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t);
+unsigned long int mpz_refremove (mpz_t, const mpz_t, const mpz_t);
+
+int
+main (int argc, char **argv)
+{
+  unsigned long int exp;
+  mpz_t t, dest, refdest, dividend, divisor;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  unsigned long int pwr, refpwr;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_inits (bs, t, dest, refdest, dividend, divisor, NULL);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 18 + 1; /* 1..524288 bit operands */
+
+      do
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  divisor_size = mpz_get_ui (bs);
+	  mpz_rrandomb (divisor, rands, divisor_size);
+	}
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 32);
+      exp = mpz_get_ui (bs) % (5 + 10000 / mpz_sizeinbase (divisor, 2));
+      if (mpz_get_ui (bs) & 2)
+	mpz_neg (divisor, divisor);
+      mpz_pow_ui (t, divisor, exp);
+      mpz_mul (dividend, dividend, t);
+
+      refpwr = mpz_refremove (refdest, dividend, divisor);
+      pwr = mpz_remove (dest, dividend, divisor);
+
+      if (refpwr != pwr || mpz_cmp (refdest, dest) != 0)
+	{
+	  fprintf (stderr, "ERROR after %d tests\n", i);
+	  fprintf (stderr, "refpower = %lu\n", refpwr);
+	  fprintf (stderr, "   power = %lu\n", pwr);
+	  fprintf (stderr, "    op1 = "); debug_mp (dividend);
+	  fprintf (stderr, "    op2 = "); debug_mp (divisor);
+	  fprintf (stderr, "refdest = "); debug_mp (refdest);
+	  fprintf (stderr, "   dest = "); debug_mp (dest);
+	  abort ();
+	}
+    }
+
+  mpz_clears (bs, t, dest, refdest, dividend, divisor, NULL);
+
+  tests_end ();
+  exit (0);
+}
+
+unsigned long int
+mpz_refremove (mpz_t dest, const mpz_t src, const mpz_t f)
+{
+  unsigned long int pwr;
+
+  pwr = 0;
+
+  mpz_set (dest, src);
+  if (mpz_cmpabs_ui (f, 1) > 0)
+    {
+      mpz_t rem, x;
+
+      mpz_init (x);
+      mpz_init (rem);
+
+      for (;; pwr++)
+	{
+	  mpz_tdiv_qr (x, rem, dest, f);
+	  if (mpz_cmp_ui (rem, 0) != 0)
+	    break;
+	  mpz_swap (dest, x);
+	}
+
+      mpz_clear (x);
+      mpz_clear (rem);
+    }
+
+  return pwr;
+}
+
+void
+debug_mp (mpz_t x)
+{
+  size_t siz = mpz_sizeinbase (x, 16);
+
+  if (siz > 65)
+    {
+      mpz_t q;
+      mpz_init (q);
+      mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));
+      gmp_fprintf (stderr, "%ZX...", q);
+      mpz_tdiv_r_2exp (q, x, 4 * 25);
+      gmp_fprintf (stderr, "%025ZX [%d]\n", q, (int) siz);
+      mpz_clear (q);
+    }
+  else
+    {
+      gmp_fprintf (stderr, "%ZX\n", x);
+    }
+}

diff --git a/tests/mpz/t-root.c b/tests/mpz/t-root.c
new file mode 100644
index 0000000..e1ce159
--- /dev/null
+++ b/tests/mpz/t-root.c

@@ -0,0 +1,174 @@
+/* Test mpz_root, mpz_rootrem, and mpz_perfect_power_p.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2009, 2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t, int);
+
+void
+check_one (mpz_t root1, mpz_t x2, unsigned long nth, int res, int i)
+{
+  mpz_t temp, temp2;
+  mpz_t root2, rem2;
+
+  mpz_init (root2);
+  mpz_init (rem2);
+  mpz_init (temp);
+  mpz_init (temp2);
+
+  MPZ_CHECK_FORMAT (root1);
+
+  mpz_rootrem (root2, rem2, x2, nth);
+  MPZ_CHECK_FORMAT (root2);
+  MPZ_CHECK_FORMAT (rem2);
+
+  mpz_pow_ui (temp, root1, nth);
+  MPZ_CHECK_FORMAT (temp);
+
+  mpz_add (temp2, temp, rem2);
+
+  /* Is power of result > argument?  */
+  if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmpabs (temp, x2) > 0 || res == mpz_cmp_ui (rem2, 0))
+    {
+      fprintf (stderr, "ERROR after test %d\n", i);
+      debug_mp (x2, 10);
+      debug_mp (root1, 10);
+      debug_mp (root2, 10);
+      fprintf (stderr, "nth: %lu ,res: %i\n", nth, res);
+      abort ();
+    }
+
+  if (nth > 1 && mpz_cmp_ui (temp, 1L) > 0 && ! mpz_perfect_power_p (temp))
+    {
+      fprintf (stderr, "ERROR in mpz_perfect_power_p after test %d\n", i);
+      debug_mp (temp, 10);
+      debug_mp (root1, 10);
+      fprintf (stderr, "nth: %lu\n", nth);
+      abort ();
+    }
+
+  if (nth <= 10000 && mpz_sgn(x2) > 0)		/* skip too expensive test */
+    {
+      mpz_add_ui (temp2, root1, 1L);
+      mpz_pow_ui (temp2, temp2, nth);
+      MPZ_CHECK_FORMAT (temp2);
+
+      /* Is square of (result + 1) <= argument?  */
+      if (mpz_cmp (temp2, x2) <= 0)
+	{
+	  fprintf (stderr, "ERROR after test %d\n", i);
+	  debug_mp (x2, 10);
+	  debug_mp (root1, 10);
+	  fprintf (stderr, "nth: %lu\n", nth);
+	  abort ();
+	}
+    }
+
+  mpz_clear (root2);
+  mpz_clear (rem2);
+  mpz_clear (temp);
+  mpz_clear (temp2);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x2;
+  mpz_t root1;
+  mp_size_t x2_size;
+  int i, res;
+  int reps = 500;
+  unsigned long nth;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (root1);
+
+  /* This triggers a gcc 4.3.2 bug */
+  mpz_set_str (x2, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000000000002", 16);
+  res = mpz_root (root1, x2, 2);
+  check_one (root1, x2, 2, res, -1);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      x2_size = mpz_get_ui (bs) + 10;
+      mpz_rrandomb (x2, rands, x2_size);
+
+      mpz_urandomb (bs, rands, 15);
+      nth = mpz_getlimbn (bs, 0) % mpz_sizeinbase (x2, 2) + 2;
+
+      res = mpz_root (root1, x2, nth);
+
+      mpz_urandomb (bs, rands, 4);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	{
+	  /* With 50% probability, set x2 near a perfect power.  */
+	  mpz_pow_ui (x2, root1, nth);
+	  if ((bsi & 2) != 0)
+	    {
+	      mpz_sub_ui (x2, x2, bsi >> 2);
+	      mpz_abs (x2, x2);
+	    }
+	  else
+	    mpz_add_ui (x2, x2, bsi >> 2);
+	  res = mpz_root (root1, x2, nth);
+	}
+
+      check_one (root1, x2, nth, res, i);
+
+      if (((nth & 1) != 0) && ((bsi & 2) != 0))
+	{
+	  mpz_neg (x2, x2);
+	  mpz_neg (root1, root1);
+	  check_one (root1, x2, nth, res, i);
+	}
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (root1);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-scan.c b/tests/mpz/t-scan.c
new file mode 100644
index 0000000..ababcfa
--- /dev/null
+++ b/tests/mpz/t-scan.c

@@ -0,0 +1,131 @@
+/* Tests of mpz_scan0 and mpz_scan1.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+unsigned long
+refmpz_scan (mpz_srcptr z, unsigned long i, int sought)
+{
+  unsigned long  z_bits = (unsigned long) ABSIZ(z) * GMP_NUMB_BITS;
+
+  do
+    {
+      if (mpz_tstbit (z, i) == sought)
+        return i;
+      i++;
+    }
+  while (i <= z_bits);
+
+  return ULONG_MAX;
+}
+
+unsigned long
+refmpz_scan0 (mpz_srcptr z, unsigned long starting_bit)
+{
+  return refmpz_scan (z, starting_bit, 0);
+}
+
+unsigned long
+refmpz_scan1 (mpz_srcptr z, unsigned long starting_bit)
+{
+  return refmpz_scan (z, starting_bit, 1);
+}
+
+
+void
+check_ref (void)
+{
+  static const int offset[] = {
+    -2, -1, 0, 1, 2, 3
+  };
+
+  mpz_t          z;
+  int            test, neg, sought, oindex, o;
+  mp_size_t      size, isize;
+  unsigned long  start, got, want;
+
+  mpz_init (z);
+  for (test = 0; test < 5; test++)
+    {
+      for (size = 0; size < 5; size++)
+        {
+          mpz_random2 (z, size);
+
+          for (neg = 0; neg <= 1; neg++)
+            {
+              if (neg)
+                mpz_neg (z, z);
+
+              for (isize = 0; isize <= size; isize++)
+                {
+                  for (oindex = 0; oindex < numberof (offset); oindex++)
+                    {
+                      o = offset[oindex];
+                      if ((int) isize*GMP_NUMB_BITS < -o)
+                        continue;  /* start would be negative */
+
+                      start = isize*GMP_NUMB_BITS + o;
+
+                      for (sought = 0; sought <= 1; sought++)
+                        {
+                          if (sought == 0)
+                            {
+                              got = mpz_scan0 (z, start);
+                              want = refmpz_scan0 (z, start);
+                            }
+                          else
+                            {
+                              got = mpz_scan1 (z, start);
+                              want = refmpz_scan1 (z, start);
+                            }
+
+                          if (got != want)
+                            {
+                              printf ("wrong at test=%d, size=%ld, neg=%d, start=%lu, sought=%d\n",
+                                      test, size, neg, start, sought);
+                              printf ("   z 0x");
+                              mpz_out_str (stdout, -16, z);
+                              printf ("\n");
+                              printf ("   got=%lu, want=%lu\n", got, want);
+                              exit (1);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_ref ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-set_d.c b/tests/mpz/t-set_d.c
new file mode 100644
index 0000000..1fdfc02
--- /dev/null
+++ b/tests/mpz/t-set_d.c

@@ -0,0 +1,139 @@
+/* Test mpz_set_d and mpz_init_set_d.
+
+Copyright 2000-2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    double     d;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0.0,  0 },
+    {  1.0,  1, { 1 } },
+    { -1.0, -1, { 1 } },
+
+    {  123.0,  1, { 123 } },
+    { -123.0, -1, { 123 } },
+
+    { 1e-1, 0, { 0 } },
+    { -1e-1, 0, { 0 } },
+    { 2.328306436538696e-10, 0, { 0 } },
+    { -2.328306436538696e-10, 0, { 0 } },
+    { 5.421010862427522e-20, 0, { 0 } },
+    { -5.421010862427522e-20, 0, { 0 } },
+    { 2.938735877055719e-39, 0, { 0 } },
+    { -2.938735877055719e-39, 0, { 0 } },
+  };
+
+  mpz_t  z;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_init (z);
+      mpz_set_d (z, data[i].d);
+      MPZ_CHECK_FORMAT (z);
+      if (z->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_set_d wrong on data[%d]\n", i);
+        bad:
+          d_trace   ("  d  ", data[i].d);
+          printf    ("  got  size %ld\n", (long) z->_mp_size);
+          printf    ("  want size %ld\n", (long) data[i].want_size);
+          mpn_trace ("  got  z", z->_mp_d, z->_mp_size);
+          mpn_trace ("  want z", data[i].want_data, data[i].want_size);
+          abort();
+        }
+      mpz_clear (z);
+
+      mpz_init_set_d (z, data[i].d);
+      MPZ_CHECK_FORMAT (z);
+      if (z->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_init_set_d wrong on data[%d]\n", i);
+          goto bad;
+        }
+      mpz_clear (z);
+    }
+}
+
+/* Try mpz_set_d on values 2^i+1, while such a value fits a double. */
+void
+check_2n_plus_1 (void)
+{
+  volatile double  p, d, diff;
+  mpz_t  want, got;
+  int    i;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  p = 1.0;
+  mpz_set_ui (want, 2L);  /* gives 3 on first step */
+
+  for (i = 1; i < 500; i++)
+    {
+      mpz_mul_2exp (want, want, 1L);
+      mpz_sub_ui (want, want, 1L);   /* want = 2^i+1 */
+
+      p *= 2.0;  /* p = 2^i */
+      d = p + 1.0;
+      diff = d - p;
+      if (diff != 1.0)
+        break;   /* rounding occurred, stop now */
+
+      mpz_set_d (got, d);
+      MPZ_CHECK_FORMAT (got);
+      if (mpz_cmp (got, want) != 0)
+        {
+          printf ("mpz_set_d wrong on 2^%d+1\n", i);
+          d_trace   ("  d ", d);
+          mpz_trace ("  got  ", got);
+          mpz_trace ("  want ", want);
+          abort ();
+        }
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_2n_plus_1 ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-set_f.c b/tests/mpz/t-set_f.c
new file mode 100644
index 0000000..2671a86
--- /dev/null
+++ b/tests/mpz/t-set_f.c

@@ -0,0 +1,125 @@
+/* Test mpz_set_f.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr z)
+{
+  static const int shift[] = {
+    0, 1, GMP_LIMB_BITS, 2*GMP_LIMB_BITS, 5*GMP_LIMB_BITS
+  };
+
+  int    sh, shneg, neg;
+  mpf_t  f;
+  mpz_t  got, want;
+
+  mpf_init2 (f, mpz_sizeinbase(z,2));
+  mpz_init (got);
+  mpz_init (want);
+
+  for (sh = 0; sh < numberof(shift); sh++)
+    {
+      for (shneg = 0; shneg <= 1; shneg++)
+	{
+	  for (neg = 0; neg <= 1; neg++)
+	    {
+	      mpf_set_z (f, z);
+	      mpz_set (want, z);
+
+	      if (neg)
+		{
+		  mpf_neg (f, f);
+		  mpz_neg (want, want);
+		}
+
+	      if (shneg)
+		{
+		  mpz_tdiv_q_2exp (want, want, shift[sh]);
+		  mpf_div_2exp (f, f, shift[sh]);
+		}
+	      else
+		{
+		  mpz_mul_2exp (want, want, shift[sh]);
+		  mpf_mul_2exp (f, f, shift[sh]);
+		}
+
+	      mpz_set_f (got, f);
+	      MPZ_CHECK_FORMAT (got);
+
+	      if (mpz_cmp (got, want) != 0)
+		{
+		  printf ("wrong result\n");
+		  printf ("  shift  %d\n", shneg ? -shift[sh] : shift[sh]);
+		  printf ("  neg    %d\n", neg);
+		  mpf_trace ("     f", f);
+		  mpz_trace ("   got", got);
+		  mpz_trace ("  want", want);
+		  abort ();
+		}
+	    }
+	}
+    }
+
+  mpf_clear (f);
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+void
+check_various (void)
+{
+  mpz_t  z;
+
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  check_one (z);
+
+  mpz_set_si (z, 123L);
+  check_one (z);
+
+  mpz_rrandomb (z, RANDS, 2*GMP_LIMB_BITS);
+  check_one (z);
+
+  mpz_rrandomb (z, RANDS, 5*GMP_LIMB_BITS);
+  check_one (z);
+
+  mpz_clear (z);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+#if GMP_NAIL_BITS == 0
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+#endif
+  exit (0);
+}

diff --git a/tests/mpz/t-set_si.c b/tests/mpz/t-set_si.c
new file mode 100644
index 0000000..4e8ed7a
--- /dev/null
+++ b/tests/mpz/t-set_si.c

@@ -0,0 +1,96 @@
+/* Test mpz_set_si and mpz_init_set_si.
+
+Copyright 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+#if GMP_NUMB_BITS <= BITS_PER_ULONG
+#define ENTRY(n)   { n, { n, 0 } }
+#else
+#define ENTRY(n)   { n, { (n) & GMP_NUMB_MASK, (n) >> GMP_NUMB_BITS } }
+#endif
+
+  static const struct {
+    long       n;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+    { -1L, -1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { LONG_MAX,  1, { LONG_MAX, 0 } },
+    { -LONG_MAX,  -1, { LONG_MAX, 0 } },
+    { LONG_HIGHBIT,  -1, { ULONG_HIGHBIT, 0 } },
+#else
+    { LONG_MAX,  2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS } },
+    { -LONG_MAX,  -2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS }},
+    { LONG_HIGHBIT,  -2, { 0, ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpz_t  n;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_init (n);
+      mpz_set_si (n, data[i].n);
+      MPZ_CHECK_FORMAT (n);
+      if (n->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpz_clear (n);
+
+      mpz_init_set_si (n, data[i].n);
+      MPZ_CHECK_FORMAT (n);
+      if (n->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_init_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpz_clear (n);
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-set_str.c b/tests/mpz/t-set_str.c
new file mode 100644
index 0000000..22d36d3
--- /dev/null
+++ b/tests/mpz/t-set_str.c

@@ -0,0 +1,108 @@
+/* Test mpz_set_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr want, int fail, int base, const char *str)
+{
+  mpz_t   got;
+
+  MPZ_CHECK_FORMAT (want);
+  mp_trace_base = (base == 0 ? 16 : base);
+
+  mpz_init (got);
+
+  if (mpz_set_str (got, str, base) != fail)
+    {
+      printf ("mpz_set_str unexpectedly failed\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      abort ();
+    }
+  MPZ_CHECK_FORMAT (got);
+
+  if (fail == 0 && mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_set_str wrong\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      mpz_trace ("got ", got);
+      mpz_trace ("want", want);
+      abort ();
+    }
+
+  mpz_clear (got);
+}
+
+void
+check_samples (void)
+{
+  mpz_t  z;
+
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  check_one (z, 0, 0, "0 ");
+  check_one (z, 0, 0, " 0 0 0 ");
+  check_one (z, 0, 0, " -0B 0 ");
+  check_one (z, 0, 0, "  0X 0 ");
+  check_one (z, 0, 10, "0 ");
+  check_one (z, 0, 10, "-0   ");
+  check_one (z, 0, 10, " 0 000 000    ");
+
+  mpz_set_ui (z, 123L);
+  check_one (z, 0, 0, "123 ");
+  check_one (z, 0, 0, "123    ");
+  check_one (z, 0, 0, "0173   ");
+  check_one (z, 0, 0, " 0b 1 11 10 11  ");
+  check_one (z, 0, 0, " 0x 7b ");
+  check_one (z, 0, 0, "0x7B");
+  check_one (z, 0, 10, "123 ");
+  check_one (z, 0, 10, "123    ");
+  check_one (z, 0, 0, " 123 ");
+  check_one (z, 0, 0, "  123    ");
+  check_one (z, 0, 10, "  0000123 ");
+  check_one (z, 0, 10, "  123    ");
+  check_one (z,-1, 10, "1%");
+  check_one (z,-1, 0, "3!");
+  check_one (z,-1, 0, "0123456789");
+  check_one (z,-1, 0, "13579BDF");
+  check_one (z,-1, 0, "0b0102");
+  check_one (z,-1, 0, "0x010G");
+  check_one (z,-1, 37,"0x010G");
+  check_one (z,-1, 99,"0x010G");
+
+  mpz_clear (z);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_samples ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-sizeinbase.c b/tests/mpz/t-sizeinbase.c
new file mode 100644
index 0000000..936f2bd
--- /dev/null
+++ b/tests/mpz/t-sizeinbase.c

@@ -0,0 +1,89 @@
+/* Test mpz_sizeinbase.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#if 0
+  /* Disabled due to the bogosity of trying to fake an _mp_d pointer to
+     below an object.  Has been seen to fail on a hppa system and on ia64.  */
+
+
+/* Create a fake mpz consisting of just a single 1 bit, with totbits being
+   the total number of bits, inclusive of that 1 bit.  */
+void
+mpz_fake_bits (mpz_ptr z, unsigned long totbits)
+{
+  static mp_limb_t  n;
+  unsigned long     zero_bits, zero_limbs;
+
+  zero_bits = totbits - 1;
+  zero_limbs = zero_bits / GMP_NUMB_BITS;
+  zero_bits %= GMP_NUMB_BITS;
+
+  SIZ(z) = zero_limbs + 1;
+  PTR(z) = (&n) - (SIZ(z) - 1);
+  n = CNST_LIMB(1) << zero_bits;
+
+  ASSERT_ALWAYS (mpz_sizeinbase (z, 2) == totbits);
+}
+
+
+/* This was seen to fail on a GNU/Linux powerpc32 with gcc 2.95.2,
+   apparently due to a doubtful value of mp_bases[10].chars_per_bit_exactly
+   (0X1.34413509F79FDP-2 whereas 0X1.34413509F79FFP-2 is believed correct).
+   Presumably this is a glibc problem when gcc converts the decimal string
+   in mp_bases.c, or maybe it's only a function of the rounding mode during
+   compilation.  */
+void
+check_sample (void)
+{
+  unsigned long  totbits = 198096465;
+  int        base = 10;
+  size_t     want = 59632979;
+  size_t     got;
+  mpz_t      z;
+
+  mpz_fake_bits (z, totbits);
+  got = mpz_sizeinbase (z, base);
+  if (got != want)
+    {
+      printf ("mpz_sizeinbase\n");
+      printf ("  base    %d\n",  base);
+      printf ("  totbits %lu\n", totbits);
+      printf ("  got     %u\n",  got);
+      printf ("  want    %u\n",  want);
+      abort ();
+    }
+}
+#endif
+
+int
+main (void)
+{
+  tests_start ();
+
+  /* check_sample (); */
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/mpz/t-sqrtrem.c b/tests/mpz/t-sqrtrem.c
new file mode 100644
index 0000000..9db8a61
--- /dev/null
+++ b/tests/mpz/t-sqrtrem.c

@@ -0,0 +1,122 @@
+/* Test mpz_add, mpz_add_ui, mpz_cmp, mpz_cmp, mpz_mul, mpz_sqrtrem.
+
+Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (mpz_t, mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x2;
+  mpz_t x, rem;
+  mpz_t temp, temp2;
+  mp_size_t x2_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (x);
+  mpz_init (rem);
+  mpz_init (temp);
+  mpz_init (temp2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2; /* 0..262144 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      x2_size = mpz_get_ui (bs);
+      mpz_rrandomb (x2, rands, x2_size);
+
+      /* printf ("%ld\n", SIZ (x2)); */
+
+      mpz_sqrt (temp, x2);
+      MPZ_CHECK_FORMAT (temp);
+
+      mpz_sqrtrem (x, rem, x2);
+      MPZ_CHECK_FORMAT (x);
+      MPZ_CHECK_FORMAT (rem);
+
+      /* Are results different?  */
+      if (mpz_cmp (temp, x) != 0)
+	dump_abort (x2, x, rem);
+
+      mpz_mul (temp, x, x);
+
+      /* Is square of result > argument?  */
+      if (mpz_cmp (temp, x2) > 0)
+	dump_abort (x2, x, rem);
+
+      mpz_add_ui (temp2, x, 1);
+      mpz_mul (temp2, temp2, temp2);
+
+      /* Is square of (result + 1) <= argument?  */
+      if (mpz_cmp (temp2, x2) <= 0)
+	dump_abort (x2, x, rem);
+
+      mpz_add (temp2, temp, rem);
+
+      /* Is the remainder wrong?  */
+      if (mpz_cmp (x2, temp2) != 0)
+	dump_abort (x2, x, rem);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (x);
+  mpz_clear (rem);
+  mpz_clear (temp);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t x2, mpz_t x, mpz_t rem)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "x2        = "); debug_mp (x2, -16);
+  fprintf (stderr, "x         = "); debug_mp (x, -16);
+  fprintf (stderr, "remainder = "); debug_mp (rem, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-tdiv.c b/tests/mpz/t-tdiv.c
new file mode 100644
index 0000000..3d2eb36
--- /dev/null
+++ b/tests/mpz/t-tdiv.c

@@ -0,0 +1,145 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr, mpz_tdiv_q,
+   mpz_tdiv_r, mpz_mul.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend, divisor;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (dividend);
+  mpz_init (divisor);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 18 + 2; /* 0..524288 bit operands */
+
+      do
+	{
+	  mpz_urandomb (bs, rands, size_range);
+	  divisor_size = mpz_get_ui (bs);
+	  mpz_rrandomb (divisor, rands, divisor_size);
+	}
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (dividend, dividend);
+      if ((bsi & 2) != 0)
+	mpz_neg (divisor, divisor);
+
+      /* printf ("%ld %ld\n", SIZ (dividend), SIZ (divisor)); */
+
+      mpz_tdiv_qr (quotient, remainder, dividend, divisor);
+      mpz_tdiv_q (quotient2, dividend, divisor);
+      mpz_tdiv_r (remainder2, dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+	 with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+	dump_abort (dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+	dump_abort (dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+	if ((mpz_cmp_ui (quotient, 0) < 0)
+	    != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))
+	dump_abort (dividend, divisor);
+
+      /* Check if the remainder has the same sign as the dividend
+	 (quotient rounded towards 0).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+	if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))
+	  dump_abort (dividend, divisor);
+
+      mpz_mul (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+	dump_abort (dividend, divisor);
+
+      mpz_abs (temp, divisor);
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp (remainder, temp) >= 0)
+	dump_abort (dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (divisor);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t dividend, mpz_t divisor)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/mpz/t-tdiv_ui.c b/tests/mpz/t-tdiv_ui.c
new file mode 100644
index 0000000..6bbb947
--- /dev/null
+++ b/tests/mpz/t-tdiv_ui.c

@@ -0,0 +1,158 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr_ui, mpz_tdiv_q_ui,
+   mpz_tdiv_r_ui, mpz_tdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 200000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+	{
+	  mpz_rrandomb (bs, rands, 64);
+	  divisor = mpz_get_ui (bs);
+	}
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+	mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_tdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_tdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_tdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_tdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+	 with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+	dump_abort ("quotients from mpz_tdiv_qr_ui and mpz_tdiv_q_ui differ",
+		    dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+	dump_abort ("remainders from mpz_tdiv_qr_ui and mpz_tdiv_r_ui differ",
+		    dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+	if ((mpz_cmp_ui (quotient, 0) < 0)
+	    != (mpz_cmp_ui (dividend, 0) < 0))
+	dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the same sign as the dividend
+	 (quotient rounded towards 0).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+	if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))
+	  dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+	dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+	dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+	dump_abort ("remainder returned from mpz_tdiv_qr_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+	dump_abort ("remainder returned from mpz_tdiv_q_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+	dump_abort ("remainder returned from mpz_tdiv_r_ui is wrong",
+		    dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+	dump_abort ("remainder returned from mpz_tdiv_ui is wrong",
+		    dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}

diff --git a/tests/rand/findlc.c b/tests/rand/findlc.c
new file mode 100644
index 0000000..72fb12c
--- /dev/null
+++ b/tests/rand/findlc.c

@@ -0,0 +1,251 @@
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <math.h>
+#include "gmpstat.h"
+
+#define RCSID(msg) \
+static /**/const char *const rcsid[] = { (char *)rcsid, "\100(#)" msg }
+
+RCSID("$Id$");
+
+int g_debug = 0;
+
+static mpz_t a;
+
+static void
+sh_status (int sig)
+{
+  printf ("sh_status: signal %d caught. dumping status.\n", sig);
+
+  printf ("  a = ");
+  mpz_out_str (stdout, 10, a);
+  printf ("\n");
+  fflush (stdout);
+
+  if (SIGSEGV == sig)		/* remove SEGV handler */
+    signal (SIGSEGV, SIG_DFL);
+}
+
+/* Input is a modulus (m).  We shall find multiplier (a) and adder (c)
+   conforming to the rules found in the first comment block in file
+   mpz/urandom.c.
+
+   Then run a spectral test on the generator and discard any
+   multipliers not passing.  */
+
+/* TODO:
+
+   . find a better algorithm than a+=8; bigger jumps perhaps?
+
+*/
+
+void
+mpz_true_random (mpz_t s, unsigned long int nbits)
+{
+#if __FreeBSD__
+  FILE *fs;
+  char c[1];
+  int i;
+
+  mpz_set_ui (s, 0);
+  for (i = 0; i < nbits; i += 8)
+    {
+      for (;;)
+	{
+	  int nread;
+	  fs = fopen ("/dev/random", "r");
+	  nread = fread (c, 1, 1, fs);
+	  fclose (fs);
+	  if (nread != 0)
+	    break;
+	  sleep (1);
+	}
+      mpz_mul_2exp (s, s, 8);
+      mpz_add_ui (s, s, ((unsigned long int) c[0]) & 0xff);
+      printf ("%d random bits\n", i + 8);
+    }
+  if (nbits % 8 != 0)
+    mpz_mod_2exp (s, s, nbits);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+  const char usage[] = "usage: findlc [-dv] m2exp [low_merit [high_merit]]\n";
+  int f;
+  int v_lose, m_lose, v_best, m_best;
+  int c;
+  int debug = 1;
+  int cnt_high_merit;
+  mpz_t m;
+  unsigned long int m2exp;
+#define DIMS 6			/* dimensions run in spectral test */
+  mpf_t v[DIMS-1];		/* spectral test result (there's no v
+				   for 1st dimension */
+  mpf_t f_merit, low_merit, high_merit;
+  mpz_t acc, minus8;
+  mpz_t min, max;
+  mpz_t s;
+
+
+  mpz_init (m);
+  mpz_init (a);
+  for (f = 0; f < DIMS-1; f++)
+    mpf_init (v[f]);
+  mpf_init (f_merit);
+  mpf_init_set_d (low_merit, .1);
+  mpf_init_set_d (high_merit, .1);
+
+  while ((c = getopt (argc, argv, "a:di:hv")) != -1)
+    switch (c)
+      {
+      case 'd':			/* debug */
+	g_debug++;
+	break;
+
+      case 'v':			/* print version */
+	puts (rcsid[1]);
+	exit (0);
+
+      case 'h':
+      case '?':
+      default:
+	fputs (usage, stderr);
+	exit (1);
+      }
+
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 1)
+    {
+      fputs (usage, stderr);
+      exit (1);
+    }
+
+  /* Install signal handler. */
+  if (SIG_ERR == signal (SIGSEGV, sh_status))
+    {
+      perror ("signal (SIGSEGV)");
+      exit (1);
+    }
+  if (SIG_ERR == signal (SIGHUP, sh_status))
+    {
+      perror ("signal (SIGHUP)");
+      exit (1);
+    }
+
+  printf ("findlc: version: %s\n", rcsid[1]);
+  m2exp = atol (argv[0]);
+  mpz_init_set_ui (m, 1);
+  mpz_mul_2exp (m, m, m2exp);
+  printf ("m = 0x");
+  mpz_out_str (stdout, 16, m);
+  puts ("");
+
+  if (argc > 1)			/* have low_merit */
+    mpf_set_str (low_merit, argv[1], 0);
+  if (argc > 2)			/* have high_merit */
+    mpf_set_str (high_merit, argv[2], 0);
+
+  if (debug)
+    {
+      fprintf (stderr, "low_merit = ");
+      mpf_out_str (stderr, 10, 2, low_merit);
+      fprintf (stderr, "; high_merit = ");
+      mpf_out_str (stderr, 10, 2, high_merit);
+      fputs ("\n", stderr);
+    }
+
+  mpz_init (minus8);
+  mpz_set_si (minus8, -8L);
+  mpz_init_set_ui (acc, 0);
+  mpz_init (s);
+  mpz_init_set_d (min, 0.01 * pow (2.0, (double) m2exp));
+  mpz_init_set_d (max, 0.99 * pow (2.0, (double) m2exp));
+
+  mpz_true_random (s, m2exp);	/* Start.  */
+  mpz_setbit (s, 0);		/* Make it odd.  */
+
+  v_best = m_best = 2*(DIMS-1);
+  for (;;)
+    {
+      mpz_add (acc, acc, s);
+      mpz_mod_2exp (acc, acc, m2exp);
+#if later
+      mpz_and_si (a, acc, -8L);
+#else
+      mpz_and (a, acc, minus8);
+#endif
+      mpz_add_ui (a, a, 5);
+      if (mpz_cmp (a, min) <= 0 || mpz_cmp (a, max) >= 0)
+	continue;
+
+      spectral_test (v, DIMS, a, m);
+      for (f = 0, v_lose = m_lose = 0, cnt_high_merit = DIMS-1;
+	   f < DIMS-1; f++)
+	{
+	  merit (f_merit, f + 2, v[f], m);
+
+	  if (mpf_cmp_ui (v[f], 1 << (30 / (f + 2) + (f == 2))) < 0)
+	    v_lose++;
+
+	  if (mpf_cmp (f_merit, low_merit) < 0)
+	    m_lose++;
+
+	  if (mpf_cmp (f_merit, high_merit) >= 0)
+	    cnt_high_merit--;
+	}
+
+      if (0 == v_lose && 0 == m_lose)
+	{
+	  mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+	  if (0 == cnt_high_merit)
+	    break;		/* leave loop */
+	}
+      if (v_lose < v_best)
+	{
+	  v_best = v_lose;
+	  printf ("best (v_lose=%d; m_lose=%d): ", v_lose, m_lose);
+	  mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+	}
+      if (m_lose < m_best)
+	{
+	  m_best = m_lose;
+	  printf ("best (v_lose=%d; m_lose=%d): ", v_lose, m_lose);
+	  mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+	}
+    }
+
+  mpz_clear (m);
+  mpz_clear (a);
+  for (f = 0; f < DIMS-1; f++)
+    mpf_clear (v[f]);
+  mpf_clear (f_merit);
+  mpf_clear (low_merit);
+  mpf_clear (high_merit);
+
+  printf ("done.\n");
+  return 0;
+}

diff --git a/tests/rand/gen.c b/tests/rand/gen.c
new file mode 100644
index 0000000..c8e6c67
--- /dev/null
+++ b/tests/rand/gen.c

@@ -0,0 +1,480 @@
+/* gen.c -- Generate pseudorandom numbers.
+
+Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* Examples:
+
+  $ gen 10
+10 integers 0 <= X < 2^32 generated by mpz_urandomb()
+
+  $ gen -f mpf_urandomb 10
+10 real numbers 0 <= X < 1
+
+  $ gen -z 127 10
+10 integers 0 <= X < 2^127
+
+  $ gen -f mpf_urandomb -x .9,1 10
+10 real numbers 0 <= X < .9
+
+  $ gen -s 1 10
+10 integers, sequence seeded with 1
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#include "gmp-impl.h"
+
+int main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  const char usage[] =
+    "usage: gen [-bhpq] [-a n] [-c a,c,m2exp] [-C a,c,m] [-f func] [-g alg] [-m n] [-s n] " \
+    "[-x f,t] [-z n] [n]\n" \
+    "  n        number of random numbers to generate\n" \
+    "  -a n     ASCII output in radix n (default, with n=10)\n" \
+    "  -b       binary output\n" \
+    "  -c a,c,m2exp use supplied LC scheme\n" \
+    "  -f func  random function, one of\n" \
+    "           mpz_urandomb (default), mpz_urandomm, mpf_urandomb, rand, random\n" \
+    "  -g alg   algorithm, one of mt (default), lc\n" \
+    "  -h       print this text and exit\n" \
+    "  -m n     maximum size of generated number plus 1 (0<= X < n) for mpz_urandomm\n" \
+    "  -p       print used seed on stderr\n" \
+    "  -q       quiet, no output\n" \
+    "  -s n     initial seed (default: output from time(3))\n" \
+    "  -x f,t   exclude all numbers f <= x <= t\n" \
+    "  -z n     size in bits of generated numbers (0<= X <2^n) (default 32)\n" \
+    "";
+
+  unsigned long int f;
+  unsigned long int n = 0;
+  unsigned long int seed;
+  unsigned long int m2exp = 0;
+  unsigned int size = 32;
+  int seed_from_user = 0;
+  int ascout = 1, binout = 0, printseed = 0;
+  int output_radix = 10;
+  int lc_scheme_from_user = 0;
+  int quiet_flag = 0;
+  mpz_t z_seed;
+  mpz_t z1;
+  mpf_t f1;
+  gmp_randstate_t rstate;
+  int c, i;
+  double drand;
+  long lrand;
+  int do_exclude = 0;
+  mpf_t f_xf, f_xt;		/* numbers to exclude from sequence */
+  char *str_xf, *str_xt;	/* numbers to exclude from sequence */
+  char *str_a, *str_adder, *str_m;
+  mpz_t z_a, z_m, z_mmax;
+  unsigned long int ul_adder;
+
+  enum
+  {
+    RFUNC_mpz_urandomb = 0,
+    RFUNC_mpz_urandomm,
+    RFUNC_mpf_urandomb,
+    RFUNC_rand,
+    RFUNC_random,
+  } rfunc = RFUNC_mpz_urandomb;
+  char *rfunc_str[] =  { "mpz_urandomb", "mpz_urandomm", "mpf_urandomb",
+			 "rand", "random" };
+  enum
+  {
+    RNG_MT = 0,
+    RNG_LC
+  };
+  gmp_randalg_t ralg = RNG_MT;
+  /* Texts for the algorithms.  The index of each must match the
+     corresponding algorithm in the enum above.  */
+  char *ralg_str[] = { "mt", "lc" };
+
+  mpf_init (f_xf);
+  mpf_init (f_xt);
+  mpf_init (f1);
+  mpz_init (z1);
+  mpz_init (z_seed);
+  mpz_init_set_ui (z_mmax, 0);
+
+
+  while ((c = getopt (argc, argv, "a:bc:f:g:hm:n:pqs:z:x:")) != -1)
+    switch (c)
+      {
+      case 'a':
+	ascout = 1;
+	binout = 0;
+	output_radix = atoi (optarg);
+	break;
+
+      case 'b':
+	ascout = 0;
+	binout = 1;
+	break;
+
+      case 'c':			/* User supplied LC scheme: a,c,m2exp */
+	if (NULL == (str_a = strtok (optarg, ","))
+	    || NULL == (str_adder = strtok (NULL, ","))
+	    || NULL == (str_m = strtok (NULL, ",")))
+	  {
+	    fprintf (stderr, "gen: bad LC scheme parameters: %s\n", optarg);
+	    exit (1);
+	  }
+#ifdef HAVE_STRTOUL
+	ul_adder = strtoul (str_adder, NULL, 0);
+#elif HAVE_STRTOL
+	ul_adder = (unsigned long int) strtol (str_adder, NULL, 0);
+#else
+	ul_adder = (unsigned long int) atoi (str_adder);
+#endif
+
+	if (mpz_init_set_str (z_a, str_a, 0))
+	  {
+	    fprintf (stderr, "gen: bad LC scheme parameter `a': %s\n", str_a);
+	    exit (1);
+	  }
+	if (ULONG_MAX == ul_adder)
+	  {
+	    fprintf (stderr, "gen: bad LC scheme parameter `c': %s\n",
+		     str_adder);
+	    exit (1);
+	  }
+	m2exp = atol (str_m);
+
+	lc_scheme_from_user = 1;
+	break;
+
+
+      case 'f':
+	rfunc = -1;
+	for (f = 0; f < sizeof (rfunc_str) / sizeof (*rfunc_str); f++)
+	    if (!strcmp (optarg, rfunc_str[f]))
+	      {
+		rfunc = f;
+		break;
+	      }
+	if (rfunc == -1)
+	  {
+	    fputs (usage, stderr);
+	    exit (1);
+	  }
+	break;
+
+      case 'g':			/* algorithm */
+	ralg = -1;
+	for (f = 0; f < sizeof (ralg_str) / sizeof (*ralg_str); f++)
+	    if (!strcmp (optarg, ralg_str[f]))
+	      {
+		ralg = f;
+		break;
+	      }
+	if (ralg == -1)
+	  {
+	    fputs (usage, stderr);
+	    exit (1);
+	  }
+	break;
+
+      case 'm':			/* max for mpz_urandomm() */
+	if (mpz_set_str (z_mmax, optarg, 0))
+	  {
+	    fprintf (stderr, "gen: bad max value: %s\n", optarg);
+	    exit (1);
+	  }
+	break;
+
+      case 'p':			/* print seed on stderr */
+	printseed = 1;
+	break;
+
+      case 'q':			/* quiet */
+	quiet_flag = 1;
+	break;
+
+      case 's':			/* user provided seed */
+	if (mpz_set_str (z_seed, optarg, 0))
+	  {
+	    fprintf (stderr, "gen: bad seed argument %s\n", optarg);
+	    exit (1);
+	  }
+	seed_from_user = 1;
+	break;
+
+      case 'z':
+	size = atoi (optarg);
+	if (size < 1)
+	  {
+	    fprintf (stderr, "gen: bad size argument (-z %u)\n", size);
+	    exit (1);
+	  }
+	break;
+
+      case 'x':			/* Exclude. from,to */
+	str_xf = optarg;
+	str_xt = strchr (optarg, ',');
+	if (NULL == str_xt)
+	  {
+	    fprintf (stderr, "gen: bad exclusion parameters: %s\n", optarg);
+	    exit (1);
+	  }
+	*str_xt++ = '\0';
+	do_exclude = 1;
+	break;
+
+      case 'h':
+      case '?':
+      default:
+	fputs (usage, stderr);
+	exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (! seed_from_user)
+    mpz_set_ui (z_seed, (unsigned long int) time (NULL));
+  seed = mpz_get_ui (z_seed);
+  if (printseed)
+    {
+      fprintf (stderr, "gen: seed used: ");
+      mpz_out_str (stderr, output_radix, z_seed);
+      fprintf (stderr, "\n");
+    }
+
+  mpf_set_prec (f1, size);
+
+  /* init random state and plant seed */
+  switch (rfunc)
+    {
+    case RFUNC_mpf_urandomb:
+#if 0
+      /* Don't init a too small generator.  */
+      size = PREC (f1) * GMP_LIMB_BITS;
+      /* Fall through.  */
+#endif
+    case RFUNC_mpz_urandomb:
+    case RFUNC_mpz_urandomm:
+      switch (ralg)
+	{
+	case RNG_MT:
+	  gmp_randinit_mt (rstate);
+	  break;
+
+	case RNG_LC:
+	  if (! lc_scheme_from_user)
+	    gmp_randinit_lc_2exp_size (rstate, MIN (128, size));
+	  else
+	    gmp_randinit_lc_2exp (rstate, z_a, ul_adder, m2exp);
+	  break;
+
+	default:
+	  fprintf (stderr, "gen: unsupported algorithm\n");
+	  exit (1);
+	}
+
+      gmp_randseed (rstate, z_seed);
+      break;
+
+    case RFUNC_rand:
+      srand (seed);
+      break;
+
+    case RFUNC_random:
+#ifdef __FreeBSD__		/* FIXME */
+      if (seed_from_user)
+	srandom (seed);
+      else
+	srandomdev ();
+#else
+      fprintf (stderr, "gen: unsupported algorithm\n");
+#endif
+      break;
+
+    default:
+      fprintf (stderr, "gen: random function not implemented\n");
+      exit (1);
+    }
+
+  /* set up excludes */
+  if (do_exclude)
+    switch (rfunc)
+      {
+      case RFUNC_mpf_urandomb:
+
+	if (mpf_set_str (f_xf, str_xf, 10) ||
+	    mpf_set_str (f_xt, str_xt, 10))
+	  {
+	    fprintf (stderr, "gen: bad exclusion-from (\"%s\") " \
+		     "or exclusion-to (\"%s\") string.  no exclusion done.\n",
+		     str_xf, str_xt);
+	    do_exclude = 0;
+	  }
+	break;
+
+      default:
+	fprintf (stderr, "gen: exclusion not implemented for chosen " \
+		 "randomization function.  all numbers included in sequence.\n");
+      }
+
+  /* generate and print */
+  if (argc > 0)
+    {
+#if HAVE_STRTOUL
+      n = strtoul (argv[0], (char **) NULL, 10);
+#elif HAVE_STRTOL
+      n = (unsigned long int) strtol (argv[0], (char **) NULL, 10);
+#else
+      n = (unsigned long int) atoi (argv[0]);
+#endif
+    }
+
+  for (f = 0; n == 0 || f < n; f++)
+    {
+      switch (rfunc)
+	{
+	case RFUNC_mpz_urandomb:
+	  mpz_urandomb (z1, rstate, size);
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    {
+	      /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/
+	      fprintf (stderr, "gen: binary output for mpz_urandom* is broken\n");
+	      exit (1);
+	    }
+	  else
+	    {
+	      mpz_out_str (stdout, output_radix, z1);
+	      puts ("");
+	    }
+	  break;
+
+	case RFUNC_mpz_urandomm:
+	  mpz_urandomm (z1, rstate, z_mmax);
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    {
+	      /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/
+	      fprintf (stderr, "gen: binary output for mpz_urandom* is broken\n");
+	      exit (1);
+	    }
+	  else
+	    {
+	      mpz_out_str (stdout, output_radix, z1);
+	      puts ("");
+	    }
+	  break;
+
+	case RFUNC_mpf_urandomb:
+	  mpf_urandomb (f1, rstate, size);
+	  if (do_exclude)
+	    if (mpf_cmp (f1, f_xf) >= 0 && mpf_cmp (f1, f_xt) <= 0)
+		break;
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    {
+	      fprintf (stderr, "gen: binary output for floating point numbers "\
+		       "not implemented\n");
+	      exit (1);
+	    }
+	  else
+	    {
+	      mpf_out_str (stdout, output_radix, 0, f1);
+	      puts ("");
+	    }
+	  break;
+
+	case RFUNC_rand:
+	  i = rand ();
+#ifdef FLOAT_OUTPUT
+	  if (i)
+	    drand = (double) i / (double) RAND_MAX;
+	  else
+	    drand = 0.0;
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    fwrite (&drand, sizeof (drand), 1, stdout);
+	  else
+	    printf ("%e\n", drand);
+#else
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    fwrite (&i, sizeof (i), 1, stdout);
+	  else
+	    printf ("%d\n", i);
+#endif
+	  break;
+
+	case RFUNC_random:
+	  lrand = random ();
+	  if (lrand)
+	    drand = (double) lrand / (double) 0x7fffffff;
+	  else
+	    drand = 0;
+	  if (quiet_flag)
+	    break;
+	  if (binout)
+	    fwrite (&drand, sizeof (drand), 1, stdout);
+	  else
+	    printf ("%e\n", drand);
+	  break;
+
+	default:
+	  fprintf (stderr, "gen: random function not implemented\n");
+	  exit (1);
+	}
+
+    }
+
+  /* clean up */
+  switch (rfunc)
+    {
+    case RFUNC_mpz_urandomb:
+    case RFUNC_mpf_urandomb:
+      gmp_randclear (rstate);
+      break;
+    default:
+      break;
+    }
+  mpf_clear (f1);
+  mpf_clear (f_xf);
+  mpf_clear (f_xt);
+  mpz_clear (z1);
+  mpz_clear (z_seed);
+
+  return 0;
+}
+
+static void *debug_dummyz = mpz_dump;
+static void *debug_dummyf = mpf_dump;

diff --git a/tests/rand/gmpstat.h b/tests/rand/gmpstat.h
new file mode 100644
index 0000000..99c5cca
--- /dev/null
+++ b/tests/rand/gmpstat.h

@@ -0,0 +1,75 @@
+/* gmpstat.h */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* This file requires the following header files: gmp.h */
+
+#ifndef	__GMPSTAT_H__
+#define	__GMPSTAT_H__
+
+/* Global debug flag.  FIXME: Remove. */
+extern int g_debug;
+#define DEBUG_1 0
+#define DEBUG_2 1
+
+/* Max number of dimensions in spectral test.  FIXME: Makw dynamic. */
+#define GMP_SPECT_MAXT 10
+
+void
+mpf_freqt (mpf_t Kp,
+	   mpf_t Km,
+	   mpf_t X[],
+	   const unsigned long int n);
+unsigned long int
+mpz_freqt (mpf_t V,
+	   mpz_t X[],
+	   unsigned int imax,
+	   const unsigned long int n);
+
+/* Low level functions. */
+void
+ks (mpf_t Kp,
+    mpf_t Km,
+    mpf_t X[],
+    void (P) (mpf_t, mpf_t),
+    const unsigned long int n);
+
+void
+ks_table (mpf_t p, mpf_t val, const unsigned int n);
+
+void
+x2_table (double t[],
+	  unsigned int v);
+
+void
+spectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m);
+void
+vz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n);
+void
+f_floor (mpf_t rop, mpf_t op);
+
+void
+merit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m);
+double
+merit_u (unsigned int t, mpf_t v, mpz_t m);
+
+/* From separate source files: */
+void zdiv_round (mpz_t rop, mpz_t n, mpz_t d);
+
+#endif /* !__GMPSTAT_H__ */

diff --git a/tests/rand/spect.c b/tests/rand/spect.c
new file mode 100644
index 0000000..64de5a0
--- /dev/null
+++ b/tests/rand/spect.c

@@ -0,0 +1,136 @@
+/* spect.c -- the spectral test */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* T is upper dimension.  Z_A is the LC multiplier, which is
+   relatively prime to Z_M, the LC modulus.  The result is put in
+   rop[] with v[t] in rop[t-2]. */
+
+/* BUGS: Due to lazy allocation scheme, maximum T is hard coded to MAXT. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "gmpstat.h"
+
+int g_debug = 0;
+
+int
+main (int argc, char *argv[])
+{
+  const char usage[] = "usage: spect [-d] a m n\n";
+  int c;
+  unsigned int n;
+  mpz_t a, m;
+  mpf_t res[GMP_SPECT_MAXT], res_min[GMP_SPECT_MAXT], f_tmp;
+  int f;
+
+
+  mpz_init (a);
+  mpz_init (m);
+  for (f = 0; f < GMP_SPECT_MAXT; f++)
+    {
+      mpf_init (res[f]);
+      mpf_init (res_min[f]);
+    }
+  mpf_init (f_tmp);
+  mpf_set_ui (res_min[0], 32768); /* 2^15 */
+  mpf_set_ui (res_min[1], 1024); /* 2^10 */
+  mpf_set_ui (res_min[2], 256); /* 2^8 */
+  mpf_set_ui (res_min[3], 64); /* 2^6 */
+  mpf_set_ui (res_min[4], 32); /* 2^5 */
+
+  while ((c = getopt (argc, argv, "dh")) != -1)
+    switch (c)
+      {
+      case 'd':			/* debug */
+	g_debug++;
+	break;
+      case 'h':
+      default:
+	fputs (usage, stderr);
+	exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 3)
+    {
+      fputs (usage, stderr);
+      exit (1);
+    }
+
+  mpz_set_str (a, argv[0], 0);
+  mpz_set_str (m, argv[1], 0);
+  n = (unsigned int) atoi (argv[2]);
+  if (n + 1 > GMP_SPECT_MAXT)
+    n = GMP_SPECT_MAXT + 1;
+
+  spectral_test (res, n, a, m);
+
+  for (f = 0; f < n - 1; f++)
+    {
+      /* print v */
+      printf ("%d: v = ", f + 2);
+      mpf_out_str (stdout, 10, 4, res[f]);
+
+#ifdef PRINT_RAISED_BY_TWO_AS_WELL
+      printf (" (^2 = ");
+      mpf_mul (f_tmp, res[f], res[f]);
+      mpf_out_str (stdout, 10, 4, f_tmp);
+      printf (")");
+#endif /* PRINT_RAISED_BY_TWO_AS_WELL */
+
+      /* print merit */
+      printf (" m = ");
+      merit (f_tmp, f + 2, res[f], m);
+      mpf_out_str (stdout, 10, 4, f_tmp);
+
+      if (mpf_cmp (res[f], res_min[f]) < 0)
+	printf ("\t*** v too low ***");
+      if (mpf_get_d (f_tmp) < .1)
+	printf ("\t*** merit too low ***");
+
+      puts ("");
+    }
+
+  mpz_clear (a);
+  mpz_clear (m);
+  for (f = 0; f < GMP_SPECT_MAXT; f++)
+    {
+      mpf_clear (res[f]);
+      mpf_clear (res_min[f]);
+    }
+  mpf_clear (f_tmp);
+
+  return 0;
+}
+
+
+void
+debug_foo()
+{
+  if (0)
+    {
+      mpz_dump (0);
+      mpf_dump (0);
+    }
+}

diff --git a/tests/rand/stat.c b/tests/rand/stat.c
new file mode 100644
index 0000000..fa06f4c
--- /dev/null
+++ b/tests/rand/stat.c

@@ -0,0 +1,406 @@
+/* stat.c -- statistical tests of random number sequences. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* Examples:
+
+  $ gen 1000 | stat
+Test 1000 real numbers.
+
+  $ gen 30000 | stat -2 1000
+Test 1000 real numbers 30 times and then test the 30 results in a
+``second level''.
+
+  $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
+Test 1000 integers 0 <= X <= 2^32-1.
+
+  $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
+Test 1000 integers 0 <= X <= 2^34-1.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+#include "gmpstat.h"
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#define FVECSIZ (100000L)
+
+int g_debug = 0;
+
+static void
+print_ks_results (mpf_t f_p, mpf_t f_p_prob,
+		  mpf_t f_m, mpf_t f_m_prob,
+		  FILE *fp)
+{
+  double p, pp, m, mp;
+
+  p = mpf_get_d (f_p);
+  m = mpf_get_d (f_m);
+  pp = mpf_get_d (f_p_prob);
+  mp = mpf_get_d (f_m_prob);
+
+  fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
+  fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
+}
+
+static void
+print_x2_table (unsigned int v, FILE *fp)
+{
+  double t[7];
+  int f;
+
+
+  fprintf (fp, "Chi-square table for v=%u\n", v);
+  fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
+  x2_table (t, v);
+  for (f = 0; f < 7; f++)
+    fprintf (fp, "%.2f\t", t[f]);
+  fputs ("\n", fp);
+}
+
+
+
+/* Pks () -- Distribution function for KS results with a big n (like 1000
+   or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
+/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
+
+static void
+Pks (mpf_t p, mpf_t x)
+{
+  double dt;			/* temp double */
+
+  mpf_set (p, x);
+  mpf_mul (p, p, p);		/* p = x^2 */
+  mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
+  mpf_neg (p, p);		/* p = -2*x^2 */
+  /* No pow() in gmp.  Use doubles. */
+  /* FIXME: Use exp()? */
+  dt = pow (M_E, mpf_get_d (p));
+  mpf_set_d (p, dt);
+  mpf_ui_sub (p, 1, p);
+}
+
+/* f_freq() -- frequency test on real numbers 0<=f<1*/
+static void
+f_freq (const unsigned l1runs, const unsigned l2runs,
+	mpf_t fvec[], const unsigned long n)
+{
+  unsigned f;
+  mpf_t f_p, f_p_prob;
+  mpf_t f_m, f_m_prob;
+  mpf_t *l1res;			/* level 1 result array */
+
+  mpf_init (f_p);  mpf_init (f_m);
+  mpf_init (f_p_prob);  mpf_init (f_m_prob);
+
+
+  /* Allocate space for 1st level results. */
+  l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
+  if (NULL == l1res)
+    {
+      fprintf (stderr, "stat: malloc failure\n");
+      exit (1);
+    }
+
+  printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
+  printf ("\tKp\t\tKm\n");
+
+  for (f = 0; f < l2runs; f++)
+    {
+      /*  f_printvec (fvec, n); */
+      mpf_freqt (f_p, f_m, fvec + f * n, n);
+
+      /* what's the probability of getting these results? */
+      ks_table (f_p_prob, f_p, n);
+      ks_table (f_m_prob, f_m, n);
+
+      if (l1runs == 0)
+	{
+	  /*printf ("%u:\t", f + 1);*/
+	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+	}
+      else
+	{
+	  /* save result */
+	  mpf_init_set (l1res[f], f_p);
+	  mpf_init_set (l1res[f + l2runs], f_m);
+	}
+    }
+
+  /* Now, apply the KS test on the results from the 1st level rounds
+     with the distribution
+     F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */
+
+  if (l1runs != 0)
+    {
+      /*printf ("-------------------------------------\n");*/
+
+      /* The Kp's. */
+      ks (f_p, f_m, l1res, Pks, l2runs);
+      ks_table (f_p_prob, f_p, l2runs);
+      ks_table (f_m_prob, f_m, l2runs);
+      printf ("Kp:\t");
+      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+
+      /* The Km's. */
+      ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
+      ks_table (f_p_prob, f_p, l2runs);
+      ks_table (f_m_prob, f_m, l2runs);
+      printf ("Km:\t");
+      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+    }
+
+  mpf_clear (f_p);  mpf_clear (f_m);
+  mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
+  free (l1res);
+}
+
+/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
+   0<=z<=MAX */
+static void
+z_freq (const unsigned l1runs,
+	const unsigned l2runs,
+	mpz_t zvec[],
+	const unsigned long n,
+	unsigned int max)
+{
+  mpf_t V;			/* result */
+  double d_V;			/* result as a double */
+
+  mpf_init (V);
+
+
+  printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
+  print_x2_table (max, stdout);
+
+  mpz_freqt (V, zvec, max, n);
+
+  d_V = mpf_get_d (V);
+  printf ("V = %.2f (n = %lu)\n", d_V, n);
+
+  mpf_clear (V);
+}
+
+unsigned int stat_debug = 0;
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  const char usage[] =
+    "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
+    "       file     filename\n" \
+    "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
+    "       -d       increase debugging level\n" \
+    "       -i max   input is integers 0 <= Z <= MAX\n" \
+    "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
+    "                maximum value when converting real numbers to integers\n" \
+    "";
+
+  mpf_t fvec[FVECSIZ];
+  mpz_t zvec[FVECSIZ];
+  unsigned long int f, n, vecentries;
+  char *filen;
+  FILE *fp;
+  int c;
+  int omitoutput = 0;
+  int realinput = -1;		/* 1: input is real numbers 0<=R<1;
+				   0: input is integers 0 <= Z <= MAX. */
+  long l1runs = 0,		/* 1st level runs */
+    l2runs = 1;			/* 2nd level runs */
+  mpf_t f_temp;
+  mpz_t z_imax;			/* max value when converting between
+				   real number and integer. */
+  mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
+				   convenience */
+  mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
+				   convenience */
+
+
+  mpf_init (f_temp);
+  mpz_init_set_ui (z_imax, 0x7fffffff);
+  mpf_init (f_imax_plus1);
+  mpf_init (f_imax_minus1);
+
+  while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
+    switch (c)
+      {
+      case '2':
+	l1runs = atol (optarg);
+	l2runs = -1;		/* set later on */
+	break;
+      case 'd':			/* increase debug level */
+	stat_debug++;
+	break;
+      case 'i':
+	if (1 == realinput)
+	  {
+	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
+	    exit (1);
+	  }
+	if (mpz_set_str (z_imax, optarg, 0))
+	  {
+	    fprintf (stderr, "stat: bad max value %s\n", optarg);
+	    exit (1);
+	  }
+	realinput = 0;
+	break;
+      case 'r':
+	if (0 == realinput)
+	  {
+	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
+	    exit (1);
+	  }
+	if (mpz_set_str (z_imax, optarg, 0))
+	  {
+	    fprintf (stderr, "stat: bad max value %s\n", optarg);
+	    exit (1);
+	  }
+	realinput = 1;
+	break;
+      case 'o':
+	omitoutput = atoi (optarg);
+	break;
+      case '?':
+      default:
+	fputs (usage, stderr);
+	exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 1)
+    fp = stdin;
+  else
+    filen = argv[0];
+
+  if (fp != stdin)
+    if (NULL == (fp = fopen (filen, "r")))
+      {
+	perror (filen);
+	exit (1);
+      }
+
+  if (-1 == realinput)
+    realinput = 1;		/* default is real numbers */
+
+  /* read file and fill appropriate vec */
+  if (1 == realinput)		/* real input */
+    {
+      for (f = 0; f < FVECSIZ ; f++)
+	{
+	  mpf_init (fvec[f]);
+	  if (!mpf_inp_str (fvec[f], fp, 10))
+	    break;
+	}
+    }
+  else				/* integer input */
+    {
+      for (f = 0; f < FVECSIZ ; f++)
+	{
+	  mpz_init (zvec[f]);
+	  if (!mpz_inp_str (zvec[f], fp, 10))
+	    break;
+	}
+    }
+  vecentries = n = f;		/* number of entries read */
+  fclose (fp);
+
+  if (FVECSIZ == f)
+    fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
+	     "of only %ld entries.  sorry.\n", FVECSIZ);
+
+  printf ("Got %lu numbers.\n", n);
+
+  /* convert and fill the other vec */
+  /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
+     0<=z<=imax and we are truncating all fractions when
+     converting float to int, we have to add 1 to imax.*/
+  mpf_set_z (f_imax_plus1, z_imax);
+  mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
+  if (1 == realinput)		/* fill zvec[] */
+    {
+      for (f = 0; f < n; f++)
+	{
+	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
+	  mpz_init (zvec[f]);
+	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
+	  if (stat_debug > 1)
+	    {
+	      mpz_out_str (stderr, 10, zvec[f]);
+	      fputs ("\n", stderr);
+	    }
+	}
+    }
+  else				/* integer input; fill fvec[] */
+    {
+      /*    mpf_set_z (f_imax_minus1, z_imax);
+	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
+      for (f = 0; f < n; f++)
+	{
+	  mpf_init (fvec[f]);
+	  mpf_set_z (fvec[f], zvec[f]);
+	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
+	  if (stat_debug > 1)
+	    {
+	      mpf_out_str (stderr, 10, 0, fvec[f]);
+	      fputs ("\n", stderr);
+	    }
+	}
+    }
+
+  /* 2 levels? */
+  if (1 != l2runs)
+    {
+      l2runs = n / l1runs;
+      printf ("Doing %ld second level rounds "\
+	      "with %ld entries in each round", l2runs, l1runs);
+      if (n % l1runs)
+	printf (" (discarding %ld entr%s)", n % l1runs,
+		n % l1runs == 1 ? "y" : "ies");
+      puts (".");
+      n = l1runs;
+    }
+
+#ifndef DONT_FFREQ
+  f_freq (l1runs, l2runs, fvec, n);
+#endif
+#ifdef DO_ZFREQ
+  z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
+#endif
+
+  mpf_clear (f_temp); mpz_clear (z_imax);
+  mpf_clear (f_imax_plus1);
+  mpf_clear (f_imax_minus1);
+  for (f = 0; f < vecentries; f++)
+    {
+      mpf_clear (fvec[f]);
+      mpz_clear (zvec[f]);
+    }
+
+  return 0;
+}

diff --git a/tests/rand/statlib.c b/tests/rand/statlib.c
new file mode 100644
index 0000000..db05380
--- /dev/null
+++ b/tests/rand/statlib.c

@@ -0,0 +1,836 @@
+/* statlib.c -- Statistical functions for testing the randomness of
+   number sequences. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* The theories for these functions are taken from D. Knuth's "The Art
+of Computer Programming: Volume 2, Seminumerical Algorithms", Third
+Edition, Addison Wesley, 1998. */
+
+/* Implementation notes.
+
+The Kolmogorov-Smirnov test.
+
+Eq. (13) in Knuth, p. 50, says that if X1, X2, ..., Xn are independent
+observations arranged into ascending order
+
+	Kp = sqr(n) * max(j/n - F(Xj))		for all 1<=j<=n
+	Km = sqr(n) * max(F(Xj) - (j-1)/n))	for all 1<=j<=n
+
+where F(x) = Pr(X <= x) = probability that (X <= x), which for a
+uniformly distributed random real number between zero and one is
+exactly the number itself (x).
+
+
+The answer to exercise 23 gives the following implementation, which
+doesn't need the observations to be sorted in ascending order:
+
+for (k = 0; k < m; k++)
+	a[k] = 1.0
+	b[k] = 0.0
+	c[k] = 0
+
+for (each observation Xj)
+	Y = F(Xj)
+	k = floor (m * Y)
+	a[k] = min (a[k], Y)
+	b[k] = max (b[k], Y)
+	c[k] += 1
+
+	j = 0
+	rp = rm = 0
+	for (k = 0; k < m; k++)
+		if (c[k] > 0)
+			rm = max (rm, a[k] - j/n)
+			j += c[k]
+			rp = max (rp, j/n - b[k])
+
+Kp = sqr (n) * rp
+Km = sqr (n) * rm
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "gmpstat.h"
+
+/* ks (Kp, Km, X, P, n) -- Perform a Kolmogorov-Smirnov test on the N
+   real numbers between zero and one in vector X.  P is the
+   distribution function, called for each entry in X, which should
+   calculate the probability of X being greater than or equal to any
+   number in the sequence.  (For a uniformly distributed sequence of
+   real numbers between zero and one, this is simply equal to X.)  The
+   result is put in Kp and Km.  */
+
+void
+ks (mpf_t Kp,
+    mpf_t Km,
+    mpf_t X[],
+    void (P) (mpf_t, mpf_t),
+    unsigned long int n)
+{
+  mpf_t Kt;			/* temp */
+  mpf_t f_x;
+  mpf_t f_j;			/* j */
+  mpf_t f_jnq;			/* j/n or (j-1)/n */
+  unsigned long int j;
+
+  /* Sort the vector in ascending order. */
+  qsort (X, n, sizeof (__mpf_struct), mpf_cmp);
+
+  /* K-S test. */
+  /*	Kp = sqr(n) * max(j/n - F(Xj))		for all 1<=j<=n
+	Km = sqr(n) * max(F(Xj) - (j-1)/n))	for all 1<=j<=n
+  */
+
+  mpf_init (Kt); mpf_init (f_x); mpf_init (f_j); mpf_init (f_jnq);
+  mpf_set_ui (Kp, 0);  mpf_set_ui (Km, 0);
+  for (j = 1; j <= n; j++)
+    {
+      P (f_x, X[j-1]);
+      mpf_set_ui (f_j, j);
+
+      mpf_div_ui (f_jnq, f_j, n);
+      mpf_sub (Kt, f_jnq, f_x);
+      if (mpf_cmp (Kt, Kp) > 0)
+	mpf_set (Kp, Kt);
+      if (g_debug > DEBUG_2)
+	{
+	  printf ("j=%lu ", j);
+	  printf ("P()="); mpf_out_str (stdout, 10, 2, f_x); printf ("\t");
+
+	  printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" ");
+	  printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" ");
+	  printf ("Kp="); mpf_out_str (stdout, 10, 2, Kp); printf ("\t");
+	}
+      mpf_sub_ui (f_j, f_j, 1);
+      mpf_div_ui (f_jnq, f_j, n);
+      mpf_sub (Kt, f_x, f_jnq);
+      if (mpf_cmp (Kt, Km) > 0)
+	mpf_set (Km, Kt);
+
+      if (g_debug > DEBUG_2)
+	{
+	  printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" ");
+	  printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" ");
+	  printf ("Km="); mpf_out_str (stdout, 10, 2, Km); printf (" ");
+	  printf ("\n");
+	}
+    }
+  mpf_sqrt_ui (Kt, n);
+  mpf_mul (Kp, Kp, Kt);
+  mpf_mul (Km, Km, Kt);
+
+  mpf_clear (Kt); mpf_clear (f_x); mpf_clear (f_j); mpf_clear (f_jnq);
+}
+
+/* ks_table(val, n) -- calculate probability for Kp/Km less than or
+   equal to VAL with N observations.  See [Knuth section 3.3.1] */
+
+void
+ks_table (mpf_t p, mpf_t val, const unsigned int n)
+{
+  /* We use Eq. (27), Knuth p.58, skipping O(1/n) for simplicity.
+     This shortcut will result in too high probabilities, especially
+     when n is small.
+
+     Pr(Kp(n) <= s) = 1 - pow(e, -2*s^2) * (1 - 2/3*s/sqrt(n) + O(1/n)) */
+
+  /* We have 's' in variable VAL and store the result in P. */
+
+  mpf_t t1, t2;
+
+  mpf_init (t1); mpf_init (t2);
+
+  /* t1 = 1 - 2/3 * s/sqrt(n) */
+  mpf_sqrt_ui (t1, n);
+  mpf_div (t1, val, t1);
+  mpf_mul_ui (t1, t1, 2);
+  mpf_div_ui (t1, t1, 3);
+  mpf_ui_sub (t1, 1, t1);
+
+  /* t2 = pow(e, -2*s^2) */
+#ifndef OLDGMP
+  mpf_pow_ui (t2, val, 2);	/* t2 = s^2 */
+  mpf_set_d (t2, exp (-(2.0 * mpf_get_d (t2))));
+#else
+  /* hmmm, gmp doesn't have pow() for floats.  use doubles. */
+  mpf_set_d (t2, pow (M_E, -(2 * pow (mpf_get_d (val), 2))));
+#endif
+
+  /* p = 1 - t1 * t2 */
+  mpf_mul (t1, t1, t2);
+  mpf_ui_sub (p, 1, t1);
+
+  mpf_clear (t1); mpf_clear (t2);
+}
+
+static double x2_table_X[][7] = {
+  { -2.33, -1.64, -.674, 0.0, 0.674, 1.64, 2.33 }, /* x */
+  { 5.4289, 2.6896, .454276, 0.0, .454276, 2.6896, 5.4289} /* x^2 */
+};
+
+#define _2D3 ((double) .6666666666)
+
+/* x2_table (t, v, n) -- return chi-square table row for V in T[]. */
+void
+x2_table (double t[],
+	  unsigned int v)
+{
+  int f;
+
+
+  /* FIXME: Do a table lookup for v <= 30 since the following formula
+     [Knuth, vol 2, 3.3.1] is only good for v > 30. */
+
+  /* value = v + sqrt(2*v) * X[p] + (2/3) * X[p]^2 - 2/3 + O(1/sqrt(t) */
+  /* NOTE: The O() term is ignored for simplicity. */
+
+  for (f = 0; f < 7; f++)
+      t[f] =
+	v +
+	sqrt (2 * v) * x2_table_X[0][f] +
+	_2D3 * x2_table_X[1][f] - _2D3;
+}
+
+
+/* P(p, x) -- Distribution function.  Calculate the probability of X
+being greater than or equal to any number in the sequence.  For a
+random real number between zero and one given by a uniformly
+distributed random number generator, this is simply equal to X. */
+
+static void
+P (mpf_t p, mpf_t x)
+{
+  mpf_set (p, x);
+}
+
+/* mpf_freqt() -- Frequency test using KS on N real numbers between zero
+   and one.  See [Knuth vol 2, p.61]. */
+void
+mpf_freqt (mpf_t Kp,
+	   mpf_t Km,
+	   mpf_t X[],
+	   const unsigned long int n)
+{
+  ks (Kp, Km, X, P, n);
+}
+
+
+/* The Chi-square test.  Eq. (8) in Knuth vol. 2 says that if Y[]
+   holds the observations and p[] is the probability for.. (to be
+   continued!)
+
+   V = 1/n * sum((s=1 to k) Y[s]^2 / p[s]) - n */
+
+void
+x2 (mpf_t V,			/* result */
+    unsigned long int X[],	/* data */
+    unsigned int k,		/* #of categories */
+    void (P) (mpf_t, unsigned long int, void *), /* probability func */
+    void *x,			/* extra user data passed to P() */
+    unsigned long int n)	/* #of samples */
+{
+  unsigned int f;
+  mpf_t f_t, f_t2;		/* temp floats */
+
+  mpf_init (f_t); mpf_init (f_t2);
+
+
+  mpf_set_ui (V, 0);
+  for (f = 0; f < k; f++)
+    {
+      if (g_debug > DEBUG_2)
+	fprintf (stderr, "%u: P()=", f);
+      mpf_set_ui (f_t, X[f]);
+      mpf_mul (f_t, f_t, f_t);	/* f_t = X[f]^2 */
+      P (f_t2, f, x);		/* f_t2 = Pr(f) */
+      if (g_debug > DEBUG_2)
+	mpf_out_str (stderr, 10, 2, f_t2);
+      mpf_div (f_t, f_t, f_t2);
+      mpf_add (V, V, f_t);
+      if (g_debug > DEBUG_2)
+	{
+	  fprintf (stderr, "\tV=");
+	  mpf_out_str (stderr, 10, 2, V);
+	  fprintf (stderr, "\t");
+	}
+    }
+  if (g_debug > DEBUG_2)
+    fprintf (stderr, "\n");
+  mpf_div_ui (V, V, n);
+  mpf_sub_ui (V, V, n);
+
+  mpf_clear (f_t); mpf_clear (f_t2);
+}
+
+/* Pzf(p, s, x) -- Probability for category S in mpz_freqt().  It's
+   1/d for all S.  X is a pointer to an unsigned int holding 'd'. */
+static void
+Pzf (mpf_t p, unsigned long int s, void *x)
+{
+  mpf_set_ui (p, 1);
+  mpf_div_ui (p, p, *((unsigned int *) x));
+}
+
+/* mpz_freqt(V, X, imax, n) -- Frequency test on integers.  [Knuth,
+   vol 2, 3.3.2].  Keep IMAX low on this one, since we loop from 0 to
+   IMAX.  128 or 256 could be nice.
+
+   X[] must not contain numbers outside the range 0 <= X <= IMAX.
+
+   Return value is number of observations actually used, after
+   discarding entries out of range.
+
+   Since X[] contains integers between zero and IMAX, inclusive, we
+   have IMAX+1 categories.
+
+   Note that N should be at least 5*IMAX.  Result is put in V and can
+   be compared to output from x2_table (v=IMAX). */
+
+unsigned long int
+mpz_freqt (mpf_t V,
+	   mpz_t X[],
+	   unsigned int imax,
+	   const unsigned long int n)
+{
+  unsigned long int *v;		/* result */
+  unsigned int f;
+  unsigned int d;		/* number of categories = imax+1 */
+  unsigned int uitemp;
+  unsigned long int usedn;
+
+
+  d = imax + 1;
+
+  v = (unsigned long int *) calloc (imax + 1, sizeof (unsigned long int));
+  if (NULL == v)
+    {
+      fprintf (stderr, "mpz_freqt(): out of memory\n");
+      exit (1);
+    }
+
+  /* count */
+  usedn = n;			/* actual number of observations */
+  for (f = 0; f < n; f++)
+    {
+      uitemp = mpz_get_ui(X[f]);
+      if (uitemp > imax)	/* sanity check */
+	{
+	  if (g_debug)
+	    fprintf (stderr, "mpz_freqt(): warning: input insanity: %u, "\
+		     "ignored.\n", uitemp);
+	  usedn--;
+	  continue;
+	}
+      v[uitemp]++;
+    }
+
+  if (g_debug > DEBUG_2)
+    {
+      fprintf (stderr, "counts:\n");
+      for (f = 0; f <= imax; f++)
+	fprintf (stderr, "%u:\t%lu\n", f, v[f]);
+    }
+
+  /* chi-square with k=imax+1 and P(x)=1/(imax+1) for all x.*/
+  x2 (V, v, d, Pzf, (void *) &d, usedn);
+
+  free (v);
+  return (usedn);
+}
+
+/* debug dummy to drag in dump funcs */
+void
+foo_debug ()
+{
+  if (0)
+    {
+      mpf_dump (0);
+#ifndef OLDGMP
+      mpz_dump (0);
+#endif
+    }
+}
+
+/* merit (rop, t, v, m) -- calculate merit for spectral test result in
+   dimension T, see Knuth p. 105.  BUGS: Only valid for 2 <= T <=
+   6. */
+void
+merit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m)
+{
+  int f;
+  mpf_t f_m, f_const, f_pi;
+
+  mpf_init (f_m);
+  mpf_set_z (f_m, m);
+  mpf_init_set_d (f_const, M_PI);
+  mpf_init_set_d (f_pi, M_PI);
+
+  switch (t)
+    {
+    case 2:			/* PI */
+      break;
+    case 3:			/* PI * 4/3 */
+      mpf_mul_ui (f_const, f_const, 4);
+      mpf_div_ui (f_const, f_const, 3);
+      break;
+    case 4:			/* PI^2 * 1/2 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_div_ui (f_const, f_const, 2);
+      break;
+    case 5:			/* PI^2 * 8/15 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_mul_ui (f_const, f_const, 8);
+      mpf_div_ui (f_const, f_const, 15);
+      break;
+    case 6:			/* PI^3 * 1/6 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_div_ui (f_const, f_const, 6);
+      break;
+    default:
+      fprintf (stderr,
+	       "spect (merit): can't calculate merit for dimensions > 6\n");
+      mpf_set_ui (f_const, 0);
+      break;
+    }
+
+  /* rop = v^t */
+  mpf_set (rop, v);
+  for (f = 1; f < t; f++)
+    mpf_mul (rop, rop, v);
+  mpf_mul (rop, rop, f_const);
+  mpf_div (rop, rop, f_m);
+
+  mpf_clear (f_m);
+  mpf_clear (f_const);
+  mpf_clear (f_pi);
+}
+
+double
+merit_u (unsigned int t, mpf_t v, mpz_t m)
+{
+  mpf_t rop;
+  double res;
+
+  mpf_init (rop);
+  merit (rop, t, v, m);
+  res = mpf_get_d (rop);
+  mpf_clear (rop);
+  return res;
+}
+
+/* f_floor (rop, op) -- Set rop = floor (op). */
+void
+f_floor (mpf_t rop, mpf_t op)
+{
+  mpz_t z;
+
+  mpz_init (z);
+
+  /* No mpf_floor().  Convert to mpz and back. */
+  mpz_set_f (z, op);
+  mpf_set_z (rop, z);
+
+  mpz_clear (z);
+}
+
+
+/* vz_dot (rop, v1, v2, nelem) -- compute dot product of z-vectors V1,
+   V2.  N is number of elements in vectors V1 and V2. */
+
+void
+vz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  mpz_set_ui (rop, 0);
+  while (n--)
+    {
+      mpz_mul (t, V1[n], V2[n]);
+      mpz_add (rop, rop, t);
+    }
+
+  mpz_clear (t);
+}
+
+void
+spectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m)
+{
+  /* Knuth "Seminumerical Algorithms, Third Edition", section 3.3.4
+     (pp. 101-103). */
+
+  /* v[t] = min { sqrt (x[1]^2 + ... + x[t]^2) |
+     x[1] + a*x[2] + ... + pow (a, t-1) * x[t] is congruent to 0 (mod m) } */
+
+
+  /* Variables. */
+  unsigned int ui_t;
+  unsigned int ui_i, ui_j, ui_k, ui_l;
+  mpf_t f_tmp1, f_tmp2;
+  mpz_t tmp1, tmp2, tmp3;
+  mpz_t U[GMP_SPECT_MAXT][GMP_SPECT_MAXT],
+    V[GMP_SPECT_MAXT][GMP_SPECT_MAXT],
+    X[GMP_SPECT_MAXT],
+    Y[GMP_SPECT_MAXT],
+    Z[GMP_SPECT_MAXT];
+  mpz_t h, hp, r, s, p, pp, q, u, v;
+
+  /* GMP inits. */
+  mpf_init (f_tmp1);
+  mpf_init (f_tmp2);
+  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)
+    {
+      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)
+	{
+	  mpz_init_set_ui (U[ui_i][ui_j], 0);
+	  mpz_init_set_ui (V[ui_i][ui_j], 0);
+	}
+      mpz_init_set_ui (X[ui_i], 0);
+      mpz_init_set_ui (Y[ui_i], 0);
+      mpz_init (Z[ui_i]);
+    }
+  mpz_init (tmp1);
+  mpz_init (tmp2);
+  mpz_init (tmp3);
+  mpz_init (h);
+  mpz_init (hp);
+  mpz_init (r);
+  mpz_init (s);
+  mpz_init (p);
+  mpz_init (pp);
+  mpz_init (q);
+  mpz_init (u);
+  mpz_init (v);
+
+  /* Implementation inits. */
+  if (T > GMP_SPECT_MAXT)
+    T = GMP_SPECT_MAXT;			/* FIXME: Lazy. */
+
+  /* S1 [Initialize.] */
+  ui_t = 2 - 1;			/* NOTE: `t' in description == ui_t + 1
+				   for easy indexing */
+  mpz_set (h, a);
+  mpz_set (hp, m);
+  mpz_set_ui (p, 1);
+  mpz_set_ui (pp, 0);
+  mpz_set (r, a);
+  mpz_pow_ui (s, a, 2);
+  mpz_add_ui (s, s, 1);		/* s = 1 + a^2 */
+
+  /* S2 [Euclidean step.] */
+  while (1)
+    {
+      if (g_debug > DEBUG_1)
+	{
+	  mpz_mul (tmp1, h, pp);
+	  mpz_mul (tmp2, hp, p);
+	  mpz_sub (tmp1, tmp1, tmp2);
+	  if (mpz_cmpabs (m, tmp1))
+	    {
+	      printf ("***BUG***: h*pp - hp*p = ");
+	      mpz_out_str (stdout, 10, tmp1);
+	      printf ("\n");
+	    }
+	}
+      if (g_debug > DEBUG_2)
+	{
+	  printf ("hp = ");
+	  mpz_out_str (stdout, 10, hp);
+	  printf ("\nh = ");
+	  mpz_out_str (stdout, 10, h);
+	  printf ("\n");
+	  fflush (stdout);
+	}
+
+      if (mpz_sgn (h))
+	mpz_tdiv_q (q, hp, h);	/* q = floor(hp/h) */
+      else
+	mpz_set_ui (q, 1);
+
+      if (g_debug > DEBUG_2)
+	{
+	  printf ("q = ");
+	  mpz_out_str (stdout, 10, q);
+	  printf ("\n");
+	  fflush (stdout);
+	}
+
+      mpz_mul (tmp1, q, h);
+      mpz_sub (u, hp, tmp1);	/* u = hp - q*h */
+
+      mpz_mul (tmp1, q, p);
+      mpz_sub (v, pp, tmp1);	/* v = pp - q*p */
+
+      mpz_pow_ui (tmp1, u, 2);
+      mpz_pow_ui (tmp2, v, 2);
+      mpz_add (tmp1, tmp1, tmp2);
+      if (mpz_cmp (tmp1, s) < 0)
+	{
+	  mpz_set (s, tmp1);	/* s = u^2 + v^2 */
+	  mpz_set (hp, h);	/* hp = h */
+	  mpz_set (h, u);	/* h = u */
+	  mpz_set (pp, p);	/* pp = p */
+	  mpz_set (p, v);	/* p = v */
+	}
+      else
+	break;
+    }
+
+  /* S3 [Compute v2.] */
+  mpz_sub (u, u, h);
+  mpz_sub (v, v, p);
+
+  mpz_pow_ui (tmp1, u, 2);
+  mpz_pow_ui (tmp2, v, 2);
+  mpz_add (tmp1, tmp1, tmp2);
+  if (mpz_cmp (tmp1, s) < 0)
+    {
+      mpz_set (s, tmp1);	/* s = u^2 + v^2 */
+      mpz_set (hp, u);
+      mpz_set (pp, v);
+    }
+  mpf_set_z (f_tmp1, s);
+  mpf_sqrt (rop[ui_t - 1], f_tmp1);
+
+  /* S4 [Advance t.] */
+  mpz_neg (U[0][0], h);
+  mpz_set (U[0][1], p);
+  mpz_neg (U[1][0], hp);
+  mpz_set (U[1][1], pp);
+
+  mpz_set (V[0][0], pp);
+  mpz_set (V[0][1], hp);
+  mpz_neg (V[1][0], p);
+  mpz_neg (V[1][1], h);
+  if (mpz_cmp_ui (pp, 0) > 0)
+    {
+      mpz_neg (V[0][0], V[0][0]);
+      mpz_neg (V[0][1], V[0][1]);
+      mpz_neg (V[1][0], V[1][0]);
+      mpz_neg (V[1][1], V[1][1]);
+    }
+
+  while (ui_t + 1 != T)		/* S4 loop */
+    {
+      ui_t++;
+      mpz_mul (r, a, r);
+      mpz_mod (r, r, m);
+
+      /* Add new row and column to U and V.  They are initialized with
+	 all elements set to zero, so clearing is not necessary. */
+
+      mpz_neg (U[ui_t][0], r); /* U: First col in new row. */
+      mpz_set_ui (U[ui_t][ui_t], 1); /* U: Last col in new row. */
+
+      mpz_set (V[ui_t][ui_t], m); /* V: Last col in new row. */
+
+      /* "Finally, for 1 <= i < t,
+	   set q = round (vi1 * r / m),
+	   vit = vi1*r - q*m,
+	   and Ut=Ut+q*Ui */
+
+      for (ui_i = 0; ui_i < ui_t; ui_i++)
+	{
+	  mpz_mul (tmp1, V[ui_i][0], r); /* tmp1=vi1*r */
+	  zdiv_round (q, tmp1, m); /* q=round(vi1*r/m) */
+	  mpz_mul (tmp2, q, m);	/* tmp2=q*m */
+	  mpz_sub (V[ui_i][ui_t], tmp1, tmp2);
+
+	  for (ui_j = 0; ui_j <= ui_t; ui_j++) /* U[t] = U[t] + q*U[i] */
+	    {
+	      mpz_mul (tmp1, q, U[ui_i][ui_j]);	/* tmp=q*uij */
+	      mpz_add (U[ui_t][ui_j], U[ui_t][ui_j], tmp1); /* utj = utj + q*uij */
+	    }
+	}
+
+      /* s = min (s, zdot (U[t], U[t]) */
+      vz_dot (tmp1, U[ui_t], U[ui_t], ui_t + 1);
+      if (mpz_cmp (tmp1, s) < 0)
+	mpz_set (s, tmp1);
+
+      ui_k = ui_t;
+      ui_j = 0;			/* WARNING: ui_j no longer a temp. */
+
+      /* S5 [Transform.] */
+      if (g_debug > DEBUG_2)
+	printf ("(t, k, j, q1, q2, ...)\n");
+      do
+	{
+	  if (g_debug > DEBUG_2)
+	    printf ("(%u, %u, %u", ui_t + 1, ui_k + 1, ui_j + 1);
+
+	  for (ui_i = 0; ui_i <= ui_t; ui_i++)
+	    {
+	      if (ui_i != ui_j)
+		{
+		  vz_dot (tmp1, V[ui_i], V[ui_j], ui_t + 1); /* tmp1=dot(Vi,Vj). */
+		  mpz_abs (tmp2, tmp1);
+		  mpz_mul_ui (tmp2, tmp2, 2); /* tmp2 = 2*abs(dot(Vi,Vj) */
+		  vz_dot (tmp3, V[ui_j], V[ui_j], ui_t + 1); /* tmp3=dot(Vj,Vj). */
+
+		  if (mpz_cmp (tmp2, tmp3) > 0)
+		    {
+		      zdiv_round (q, tmp1, tmp3); /* q=round(Vi.Vj/Vj.Vj) */
+		      if (g_debug > DEBUG_2)
+			{
+			  printf (", ");
+			  mpz_out_str (stdout, 10, q);
+			}
+
+		      for (ui_l = 0; ui_l <= ui_t; ui_l++)
+			{
+			  mpz_mul (tmp1, q, V[ui_j][ui_l]);
+			  mpz_sub (V[ui_i][ui_l], V[ui_i][ui_l], tmp1); /* Vi=Vi-q*Vj */
+			  mpz_mul (tmp1, q, U[ui_i][ui_l]);
+			  mpz_add (U[ui_j][ui_l], U[ui_j][ui_l], tmp1); /* Uj=Uj+q*Ui */
+			}
+
+		      vz_dot (tmp1, U[ui_j], U[ui_j], ui_t + 1); /* tmp1=dot(Uj,Uj) */
+		      if (mpz_cmp (tmp1, s) < 0) /* s = min(s,dot(Uj,Uj)) */
+			mpz_set (s, tmp1);
+		      ui_k = ui_j;
+		    }
+		  else if (g_debug > DEBUG_2)
+		    printf (", #"); /* 2|Vi.Vj| <= Vj.Vj */
+		}
+	      else if (g_debug > DEBUG_2)
+		printf (", *");	/* i == j */
+	    }
+
+	  if (g_debug > DEBUG_2)
+	    printf (")\n");
+
+	  /* S6 [Advance j.] */
+	  if (ui_j == ui_t)
+	    ui_j = 0;
+	  else
+	    ui_j++;
+	}
+      while (ui_j != ui_k);	/* S5 */
+
+      /* From Knuth p. 104: "The exhaustive search in steps S8-S10
+	 reduces the value of s only rarely." */
+#ifdef DO_SEARCH
+      /* S7 [Prepare for search.] */
+      /* Find minimum in (x[1], ..., x[t]) satisfying condition
+	 x[k]^2 <= f(y[1], ...,y[t]) * dot(V[k],V[k]) */
+
+      ui_k = ui_t;
+      if (g_debug > DEBUG_2)
+	{
+	  printf ("searching...");
+	  /*for (f = 0; f < ui_t*/
+	  fflush (stdout);
+	}
+
+      /* Z[i] = floor (sqrt (floor (dot(V[i],V[i]) * s / m^2))); */
+      mpz_pow_ui (tmp1, m, 2);
+      mpf_set_z (f_tmp1, tmp1);
+      mpf_set_z (f_tmp2, s);
+      mpf_div (f_tmp1, f_tmp2, f_tmp1);	/* f_tmp1 = s/m^2 */
+      for (ui_i = 0; ui_i <= ui_t; ui_i++)
+	{
+	  vz_dot (tmp1, V[ui_i], V[ui_i], ui_t + 1);
+	  mpf_set_z (f_tmp2, tmp1);
+	  mpf_mul (f_tmp2, f_tmp2, f_tmp1);
+	  f_floor (f_tmp2, f_tmp2);
+	  mpf_sqrt (f_tmp2, f_tmp2);
+	  mpz_set_f (Z[ui_i], f_tmp2);
+	}
+
+      /* S8 [Advance X[k].] */
+      do
+	{
+	  if (g_debug > DEBUG_2)
+	    {
+	      printf ("X[%u] = ", ui_k);
+	      mpz_out_str (stdout, 10, X[ui_k]);
+	      printf ("\tZ[%u] = ", ui_k);
+	      mpz_out_str (stdout, 10, Z[ui_k]);
+	      printf ("\n");
+	      fflush (stdout);
+	    }
+
+	  if (mpz_cmp (X[ui_k], Z[ui_k]))
+	    {
+	      mpz_add_ui (X[ui_k], X[ui_k], 1);
+	      for (ui_i = 0; ui_i <= ui_t; ui_i++)
+		mpz_add (Y[ui_i], Y[ui_i], U[ui_k][ui_i]);
+
+	      /* S9 [Advance k.] */
+	      while (++ui_k <= ui_t)
+		{
+		  mpz_neg (X[ui_k], Z[ui_k]);
+		  mpz_mul_ui (tmp1, Z[ui_k], 2);
+		  for (ui_i = 0; ui_i <= ui_t; ui_i++)
+		    {
+		      mpz_mul (tmp2, tmp1, U[ui_k][ui_i]);
+		      mpz_sub (Y[ui_i], Y[ui_i], tmp2);
+		    }
+		}
+	      vz_dot (tmp1, Y, Y, ui_t + 1);
+	      if (mpz_cmp (tmp1, s) < 0)
+		mpz_set (s, tmp1);
+	    }
+	}
+      while (--ui_k);
+#endif /* DO_SEARCH */
+      mpf_set_z (f_tmp1, s);
+      mpf_sqrt (rop[ui_t - 1], f_tmp1);
+#ifdef DO_SEARCH
+      if (g_debug > DEBUG_2)
+	printf ("done.\n");
+#endif /* DO_SEARCH */
+    } /* S4 loop */
+
+  /* Clear GMP variables. */
+
+  mpf_clear (f_tmp1);
+  mpf_clear (f_tmp2);
+  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)
+    {
+      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)
+	{
+	  mpz_clear (U[ui_i][ui_j]);
+	  mpz_clear (V[ui_i][ui_j]);
+	}
+      mpz_clear (X[ui_i]);
+      mpz_clear (Y[ui_i]);
+      mpz_clear (Z[ui_i]);
+    }
+  mpz_clear (tmp1);
+  mpz_clear (tmp2);
+  mpz_clear (tmp3);
+  mpz_clear (h);
+  mpz_clear (hp);
+  mpz_clear (r);
+  mpz_clear (s);
+  mpz_clear (p);
+  mpz_clear (pp);
+  mpz_clear (q);
+  mpz_clear (u);
+  mpz_clear (v);
+
+  return;
+}

diff --git a/tests/rand/t-iset.c b/tests/rand/t-iset.c
new file mode 100644
index 0000000..884220f
--- /dev/null
+++ b/tests/rand/t-iset.c

@@ -0,0 +1,67 @@
+/* Test gmp_randinit_set.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* expect after a gmp_randinit_set that the new and old generators will
+   produce the same sequence of numbers */
+void
+check_one (const char *name, gmp_randstate_ptr src)
+{
+  gmp_randstate_t dst;
+  mpz_t  sz, dz;
+  int    i;
+
+  gmp_randinit_set (dst, src);
+  mpz_init (sz);
+  mpz_init (dz);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_urandomb (sz, src, 123);
+      mpz_urandomb (dz, dst, 123);
+
+      if (mpz_cmp (sz, dz) != 0)
+        {
+          printf     ("gmp_randinit_set didn't duplicate randstate\n");
+          printf     ("  algorithm: %s\n", name);
+          gmp_printf ("  from src:  %#Zx\n", sz);
+          gmp_printf ("  from dst:  %#Zx\n", dz);
+          abort ();
+        }
+    }
+
+  mpz_clear (sz);
+  mpz_clear (dz);
+  gmp_randclear (dst);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/rand/t-lc2exp.c b/tests/rand/t-lc2exp.c
new file mode 100644
index 0000000..c66691d
--- /dev/null
+++ b/tests/rand/t-lc2exp.c

@@ -0,0 +1,216 @@
+/* Exercise the lc2exp random functions.
+
+Copyright 2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* a=0 and c=0 produces zero results always. */
+void
+check_zero (unsigned long m2exp)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = 0L;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 5; i++)
+    {
+      mpz_urandomb (a, r, 123L);
+      if (mpz_sgn (a) != 0)
+        {
+          printf ("check_zero m2exp=%lu: didn't get zero\n", m2exp);
+          gmp_printf ("  rand=%#Zx\n", a);
+          abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+/* negative a */
+void
+check_nega (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp;
+  int              i;
+
+  mpz_init (a);
+  mpz_setbit (a, 1000L);
+  mpz_neg (a, a);
+  c = 0L;
+  m2exp = 45L;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 5; i++)
+    {
+      mpz_urandomb (a, r, 123L);
+      if (mpz_sgn (a) != 0)
+        printf ("check_nega m2exp=%lu: didn't get zero\n", m2exp);
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+void
+check_bigc (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp, bits;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = ULONG_MAX;
+  m2exp = 8;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 20; i++)
+    {
+      bits = 123L;
+      mpz_urandomb (a, r, bits);
+      if (mpz_sgn (a) < 0 || mpz_sizeinbase (a, 2) > bits)
+        {
+          printf     ("check_bigc: mpz_urandomb out of range\n");
+          printf     ("   m2exp=%lu\n", m2exp);
+          gmp_printf ("   rand=%#ZX\n", a);
+          gmp_printf ("   sizeinbase2=%u\n", mpz_sizeinbase (a, 2));
+	  abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+void
+check_bigc1 (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = ULONG_MAX;
+  m2exp = 2;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_urandomb (a, r, 1L);
+      if (mpz_cmp_ui (a, 1L) != 0)
+        {
+          printf     ("check_bigc1: mpz_urandomb didn't give 1\n");
+          printf     ("   m2exp=%lu\n", m2exp);
+          gmp_printf ("   got rand=%#ZX\n", a);
+          abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+/* Checks parameters which triggered an assertion failure in the past.
+   Happened when limbs(a)+limbs(c) < bits_to_limbs(m2exp).  */
+void
+check_bigm (void)
+{
+  gmp_randstate_t rstate;
+  mpz_t a;
+
+  mpz_init_set_ui (a, 5L);
+  gmp_randinit_lc_2exp (rstate, a, 1L, 384L);
+
+  mpz_urandomb (a, rstate, 20L);
+
+  gmp_randclear (rstate);
+  mpz_clear (a);
+}
+
+/* Checks for seeds bigger than the modulus.  */
+void
+check_bigs (void)
+{
+  gmp_randstate_t rstate;
+  mpz_t sd, a;
+  int i;
+
+  mpz_init (sd);
+  mpz_setbit (sd, 300L);
+  mpz_sub_ui (sd, sd, 1L);
+  mpz_clrbit (sd, 13L);
+  mpz_init_set_ui (a, 123456789L);
+
+  gmp_randinit_lc_2exp (rstate, a, 5L, 64L);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_neg (sd, sd);
+      gmp_randseed (rstate, sd);
+      mpz_mul_ui (sd, sd, 7L);
+
+      mpz_urandomb (a, rstate, 80L);
+    }
+
+  gmp_randclear (rstate);
+  mpz_clear (a);
+  mpz_clear (sd);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_zero (2L);
+  check_zero (7L);
+  check_zero (32L);
+  check_zero (64L);
+  check_zero (1000L);
+
+  check_nega ();
+  check_bigc ();
+  check_bigc1 ();
+
+  check_bigm ();
+  check_bigs ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/rand/t-mt.c b/tests/rand/t-mt.c
new file mode 100644
index 0000000..08ba7fc
--- /dev/null
+++ b/tests/rand/t-mt.c

@@ -0,0 +1,82 @@
+/* Test the Mersenne Twister random number generator.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/* Test that the sequence without seeding equals the sequence with the
+   default seed.  */
+int
+chk_default_seed (void)
+{
+  gmp_randstate_t r1, r2;
+  mpz_t a, b;
+  int i;
+  int ok = TRUE;
+
+  mpz_init2 (a, 19936L);
+  mpz_init2 (b, 19936L);
+
+  gmp_randinit_mt (r1);
+  gmp_randinit_mt (r2);
+  gmp_randseed_ui (r2, 5489L); /* Must match DEFAULT_SEED in randmt.c */
+  for (i = 0; i < 3; i++)
+    {
+      /* Extract one whole buffer per iteration.  */
+      mpz_urandomb (a, r1, 19936L);
+      mpz_urandomb (b, r2, 19936L);
+      if (mpz_cmp (a, b) != 0)
+	{
+	  ok = FALSE;
+	  printf ("Default seed fails in iteration %d\n", i);
+	  break;
+	}
+    }
+  gmp_randclear (r1);
+  gmp_randclear (r2);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  return ok;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int ok;
+
+  tests_start ();
+
+  ok = chk_default_seed ();
+
+  tests_end ();
+
+  if (ok)
+    return 0; /* pass */
+  else
+    return 1; /* fail */
+}

diff --git a/tests/rand/t-rand.c b/tests/rand/t-rand.c
new file mode 100644
index 0000000..1265a0d
--- /dev/null
+++ b/tests/rand/t-rand.c

@@ -0,0 +1,290 @@
+/* t-rand -- Test random number generators.  */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+
+#define SEED 1
+#define BASE 16
+#define ENTS 10			/* Number of entries in array when
+				   printing.  */
+
+/* These were generated by this very program.  Do not edit!  */
+/* Integers.  */
+const char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
+const char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
+const char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
+const char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
+const char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
+
+const char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
+
+const char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
+const char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
+const char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
+
+const char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
+const char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
+const char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
+
+const char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
+
+const char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
+const char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
+
+/* Floats.  */
+const char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
+const char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
+const char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
+const char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
+const char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
+
+const char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
+
+const char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
+const char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
+const char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
+
+const char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
+const char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
+const char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
+
+const char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
+const char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
+const char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
+
+const char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
+const char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
+
+
+struct rt
+{
+  const char **s;
+  int nbits;
+};
+
+static struct rt zarr[] =
+{
+  {z1, 1},
+  {z2, 2},
+  {z3, 3},
+  {z4, 4},
+  {z5, 5},
+  {z10, 10},
+  {z15, 15},
+  {z16, 16},
+  {z17, 17},
+  {z31, 31},
+  {z32, 32},
+  {z33, 33},
+  {z63, 63},
+  {z64, 64},
+  {z65, 65},
+  {z127, 127},
+  {z128, 128},
+  {NULL, 0}
+};
+
+static struct rt farr[] =
+{
+  {f1, 1},
+  {f2, 2},
+  {f3, 3},
+  {f4, 4},
+  {f5, 5},
+  {f10, 10},
+  {f15, 15},
+  {f16, 16},
+  {f17, 17},
+  {f31, 31},
+  {f32, 32},
+  {f33, 33},
+  {f63, 63},
+  {f64, 64},
+  {f65, 65},
+  {f127, 127},
+  {f128, 128},
+  {NULL, 0}
+};
+
+
+int
+main (int argc, char *argv[])
+{
+  static char usage[] = "\
+usage: t-rand [function nbits]\n\
+  function is one of z, f\n\
+  nbits is number of bits\n\
+";
+  gmp_randstate_t rstate;
+  mpz_t z, rz;
+  mpf_t f, rf;
+  enum { Z, F } func = Z;
+  int nbits = 1;
+  int verify_mode_flag = 1;
+  int i;
+  struct rt *a;
+
+
+  if (argc > 1)
+    {
+      if (argc < 3)
+	{
+	  fputs (usage, stderr);
+	  exit (1);
+	}
+      verify_mode_flag = 0;
+      if (*argv[1] == 'z')
+	func = Z;
+      if (*argv[1] == 'f')
+	func = F;
+      nbits = atoi (argv[2]);
+    }
+
+  mpz_init (rz);
+
+  if (verify_mode_flag)
+    {
+#ifdef VERBOSE
+      printf ("%s: verifying random numbers: ", argv[0]);
+#endif
+
+      /* Test z.  */
+      mpz_init (z);
+      for (a = zarr; a->s != NULL; a++)
+	{
+	  gmp_randinit (rstate, GMP_RAND_ALG_LC, a->nbits);
+	  if (gmp_errno != GMP_ERROR_NONE)
+	    exit (1);
+	  gmp_randseed_ui (rstate, SEED);
+
+	  for (i = 0; i < ENTS; i++)
+	    {
+	      mpz_urandomb (rz, rstate, a->nbits);
+	      mpz_set_str (z, a->s[i], BASE);
+	      if (mpz_cmp (z, rz) != 0)
+		{
+		  printf ("z%d: ", a->nbits);
+		  mpz_out_str (stdout, BASE, rz);
+		  printf (" should be ");
+		  mpz_out_str (stdout, BASE, z);
+		  puts ("");
+		  exit (1);
+		}
+	    }
+#ifdef VERBOSE
+	  printf ("z%d ", a->nbits);
+#endif
+	  gmp_randclear (rstate);
+	}
+      mpz_clear (z);
+
+
+      /* Test f.  */
+      for (a = farr; a->s != NULL; a++)
+	{
+	  gmp_randinit (rstate, GMP_RAND_ALG_LC, a->nbits);
+	  if (gmp_errno != GMP_ERROR_NONE)
+	    exit (1);
+	  gmp_randseed_ui (rstate, SEED);
+
+	  mpf_init2 (f, a->nbits);
+	  mpf_init2 (rf, a->nbits);
+	  for (i = 0; i < ENTS; i++)
+	    {
+	      mpf_urandomb (rf, rstate, a->nbits);
+	      mpf_set_str (f, a->s[i], BASE);
+	      if (mpf_cmp (f, rf) != 0)
+		{
+		  printf ("f%d: ", a->nbits);
+		  mpf_out_str (stdout, BASE, a->nbits, rf);
+		  printf (" should be ");
+		  mpf_out_str (stdout, BASE, a->nbits, f);
+		  puts ("");
+		  exit (1);
+		}
+	    }
+#ifdef VERBOSE
+	  printf ("f%d ", a->nbits);
+#endif
+	  gmp_randclear (rstate);
+	  mpf_clear (f);
+	  mpf_clear (rf);
+	}
+
+#ifdef VERBOSE
+      puts ("");
+#endif
+    }
+  else				/* Print mode.  */
+    {
+      gmp_randinit (rstate, GMP_RAND_ALG_LC, nbits);
+      if (gmp_errno != GMP_ERROR_NONE)
+	exit (1);
+      gmp_randseed_ui (rstate, SEED);
+
+      switch (func)
+	{
+	case Z:
+	  printf ("char *z%d[ENTS] = {", nbits);
+	  for (i = 0; i < ENTS; i++)
+	    {
+	      mpz_urandomb (rz, rstate, nbits);
+	      printf ("\"");
+	      mpz_out_str (stdout, BASE, rz);
+	      printf ("\"");
+	      if (i != ENTS - 1)
+		printf (", ");
+	    }
+	  printf ("};\n");
+	  printf ("  {z%d, %d},\n", nbits, nbits);
+	  break;
+
+	case F:
+	  printf ("char *f%d[ENTS] = {", nbits);
+	  mpf_init2 (rf, nbits);
+	  for (i = 0; i < ENTS; i++)
+	    {
+	      mpf_urandomb (rf, rstate, nbits);
+	      printf ("\"");
+	      mpf_out_str (stdout, BASE, nbits, rf);
+	      printf ("\"");
+	      if (i != ENTS - 1)
+		printf (", ");
+	    }
+	  printf ("};\n");
+	  printf ("  {f%d, %d},\n", nbits, nbits);
+	  mpf_clear (rf);
+	  break;
+
+	default:
+	  exit (1);
+	}
+
+      gmp_randclear (rstate);
+    }
+
+  mpz_clear (rz);
+
+  return 0;
+}

diff --git a/tests/rand/t-urbui.c b/tests/rand/t-urbui.c
new file mode 100644
index 0000000..f56f538
--- /dev/null
+++ b/tests/rand/t-urbui.c

@@ -0,0 +1,64 @@
+/* Test gmp_urandomb_ui.
+
+Copyright 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Expect numbers generated by rstate to obey the number of bits requested.
+   No point testing bits==BITS_PER_ULONG, since any return is acceptable in
+   that case.  */
+void
+check_one (const char *name, gmp_randstate_ptr rstate)
+{
+  unsigned long  bits, limit, got;
+  int    i;
+
+  for (bits = 0; bits < BITS_PER_ULONG; bits++)
+    {
+      /* will demand got < limit */
+      limit = (1UL << bits);
+
+      for (i = 0; i < 5; i++)
+        {
+          got = gmp_urandomb_ui (rstate, bits);
+          if (got >= limit)
+            {
+              printf ("Return value out of range:\n");
+              printf ("  algorithm: %s\n", name);
+              printf ("  bits:  %lu\n", bits);
+              printf ("  limit: %#lx\n", limit);
+              printf ("  got:   %#lx\n", got);
+              abort ();
+            }
+        }
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/rand/t-urmui.c b/tests/rand/t-urmui.c
new file mode 100644
index 0000000..cc8fedb
--- /dev/null
+++ b/tests/rand/t-urmui.c

@@ -0,0 +1,74 @@
+/* Test gmp_urandomm_ui.
+
+Copyright 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Expect numbers generated by rstate to obey the limit requested. */
+void
+check_one (const char *name, gmp_randstate_ptr rstate)
+{
+  static const unsigned long  n_table[] = {
+    1, 2, 3, 4, 5, 6, 7, 8,
+    123, 456, 789,
+
+    255, 256, 257,
+    1023, 1024, 1025,
+    32767, 32768, 32769,
+
+    ULONG_MAX/2-2, ULONG_MAX/2-1, ULONG_MAX/2, ULONG_MAX/2+1, ULONG_MAX/2+2,
+
+    ULONG_MAX-2, ULONG_MAX-1, ULONG_MAX,
+  };
+
+  unsigned long  got, n;
+  int    i, j;
+
+  for (i = 0; i < numberof (n_table); i++)
+    {
+      n = n_table[i];
+
+      for (j = 0; j < 5; j++)
+        {
+          got = gmp_urandomm_ui (rstate, n);
+          if (got >= n)
+            {
+              printf ("Return value out of range:\n");
+              printf ("  algorithm: %s\n", name);
+              printf ("  n:     %#lx\n", n);
+              printf ("  got:   %#lx\n", got);
+              abort ();
+            }
+        }
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/rand/t-urndmm.c b/tests/rand/t-urndmm.c
new file mode 100644
index 0000000..ae700dd
--- /dev/null
+++ b/tests/rand/t-urndmm.c

@@ -0,0 +1,158 @@
+/* Test mpz_urandomm.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+int
+check_params (void)
+{
+  gmp_randstate_t r1, r2;
+  mpz_t a, b, m;
+  int i;
+  int result;
+
+  result = TRUE;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (m);
+
+  if (result)
+    {
+      /* Test the consistency between urandomm and urandomb. */
+      gmp_randinit_default (r1);
+      gmp_randinit_default (r2);
+      gmp_randseed_ui (r1, 85L);
+      gmp_randseed_ui (r2, 85L);
+      mpz_set_ui (m, 0L);
+      mpz_setbit (m, 80L);
+      for (i = 0; i < 100; i++)
+	{
+	  mpz_urandomm (a, r1, m);
+	  mpz_urandomb (b, r2, 80L);
+	  if (mpz_cmp (a, b) != 0)
+	    {
+	      result = FALSE;
+	      printf ("mpz_urandomm != mpz_urandomb\n");
+	      break;
+	    }
+	}
+      gmp_randclear (r1);
+      gmp_randclear (r2);
+    }
+
+  if (result)
+    {
+      /* Test that mpz_urandomm returns the correct result with a
+	 broken LC.  */
+      mpz_set_ui (a, 0L);
+      gmp_randinit_lc_2exp (r1, a, 0xffL, 8L);
+      mpz_set_ui (m, 5L);
+      /* Warning: This code hangs in gmp 4.1 and below */
+      for (i = 0; i < 100; i++)
+	{
+	  mpz_urandomm (a, r1, m);
+	  if (mpz_cmp_ui (a, 2L) != 0)
+	    {
+	      result = FALSE;
+	      gmp_printf ("mpz_urandomm returns %Zd instead of 2\n", a);
+	      break;
+	    }
+	}
+      gmp_randclear (r1);
+    }
+
+  if (result)
+    {
+      /* Test that the results are always in range for either
+         positive or negative values of m.  */
+      gmp_randinit_default (r1);
+      mpz_set_ui (m, 5L);
+      mpz_set_si (b, -5L);
+      for (i = 0; i < 100; i++)
+	{
+	  mpz_urandomm (a, r1, m);
+	  if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)
+	    {
+	      result = FALSE;
+	      gmp_printf ("Out-of-range or non-positive value: %Zd\n", a);
+	      break;
+	    }
+	  mpz_urandomm (a, r1, b);
+	  if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)
+	    {
+	      result = FALSE;
+	      gmp_printf ("Out-of-range or non-positive value (from negative modulus): %Zd\n", a);
+	      break;
+	    }
+	}
+      gmp_randclear (r1);
+    }
+
+  if (result)
+    {
+      /* Test that m=1 forces always result=0.  */
+      gmp_randinit_default (r1);
+      mpz_set_ui (m, 1L);
+      for (i = 0; i < 100; i++)
+	{
+	  mpz_urandomm (a, r1, m);
+	  if (mpz_sgn (a) != 0)
+	    {
+	      result = FALSE;
+	      gmp_printf ("mpz_urandomm fails with m=1 (result=%Zd)\n", a);
+	      break;
+	    }
+	}
+      gmp_randclear (r1);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (m);
+  return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int result = TRUE;
+
+  tests_start ();
+
+  if (result)
+    if (!check_params ())
+      result = FALSE;
+
+  tests_end ();
+
+  if (result)
+    return 0; /* pass */
+  else
+    return 1; /* fail */
+}

diff --git a/tests/rand/zdiv_round.c b/tests/rand/zdiv_round.c
new file mode 100644
index 0000000..e42e694
--- /dev/null
+++ b/tests/rand/zdiv_round.c

@@ -0,0 +1,43 @@
+/* zdiv_round() -- divide integers, round to nearest */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+void
+zdiv_round (mpz_t rop, mpz_t n, mpz_t d)
+{
+  mpf_t f_n, f_d;
+
+  mpf_init (f_n);
+  mpf_init (f_d);
+
+  mpf_set_z (f_d, d);
+  mpf_set_z (f_n, n);
+
+  mpf_div (f_n, f_n, f_d);
+  mpf_set_d (f_d, .5);
+  if (mpf_sgn (f_n) < 0)
+    mpf_neg (f_d, f_d);
+  mpf_add (f_n, f_n, f_d);
+  mpz_set_f (rop, f_n);
+
+  mpf_clear (f_n);
+  mpf_clear (f_d);
+  return;
+}

diff --git a/tests/refmpf.c b/tests/refmpf.c
new file mode 100644
index 0000000..bc955a5
--- /dev/null
+++ b/tests/refmpf.c

@@ -0,0 +1,427 @@
+/* Reference floating point routines.
+
+Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+refmpf_add (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_size_t hi, lo, size;
+  mp_ptr ut, vt, wt;
+  int neg;
+  mp_exp_t exp;
+  mp_limb_t cy;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (SIZ (u) == 0)
+    {
+      size = ABSIZ (v);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (v), size);
+      exp = EXP (v);
+      neg = SIZ (v) < 0;
+      goto done;
+    }
+  if (SIZ (v) == 0)
+    {
+      size = ABSIZ (u);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (u), size);
+      exp = EXP (u);
+      neg = SIZ (u) < 0;
+      goto done;
+    }
+  if ((SIZ (u) ^ SIZ (v)) < 0)
+    {
+      mpf_t tmp;
+      SIZ (tmp) = -SIZ (v);
+      EXP (tmp) = EXP (v);
+      PTR (tmp) = PTR (v);
+      refmpf_sub (w, u, tmp);
+      return;
+    }
+  neg = SIZ (u) < 0;
+
+  /* Compute the significance of the hi and lo end of the result.  */
+  hi = MAX (EXP (u), EXP (v));
+  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));
+  size = hi - lo;
+  ut = TMP_ALLOC_LIMBS (size + 1);
+  vt = TMP_ALLOC_LIMBS (size + 1);
+  wt = TMP_ALLOC_LIMBS (size + 1);
+  MPN_ZERO (ut, size);
+  MPN_ZERO (vt, size);
+  {int off;
+  off = size + (EXP (u) - hi) - ABSIZ (u);
+  MPN_COPY (ut + off, PTR (u), ABSIZ (u));
+  off = size + (EXP (v) - hi) - ABSIZ (v);
+  MPN_COPY (vt + off, PTR (v), ABSIZ (v));
+  }
+
+  cy = mpn_add_n (wt, ut, vt, size);
+  wt[size] = cy;
+  size += cy;
+  exp = hi + cy;
+
+done:
+  if (size > PREC (w))
+    {
+      wt += size - PREC (w);
+      size = PREC (w);
+    }
+  MPN_COPY (PTR (w), wt, size);
+  SIZ (w) = neg == 0 ? size : -size;
+  EXP (w) = exp;
+  TMP_FREE;
+}
+
+
+/* Add 1 "unit in last place" (ie. in the least significant limb) to f.
+   f cannot be zero, since that has no well-defined "last place".
+
+   This routine is designed for use in cases where we pay close attention to
+   the size of the data value and are using that (and the exponent) to
+   indicate the accurate part of a result, or similar.  For this reason, if
+   there's a carry out we don't store 1 and adjust the exponent, we just
+   leave 100..00.  We don't even adjust if there's a carry out of prec+1
+   limbs, but instead give up in that case (which we intend shouldn't arise
+   in normal circumstances).  */
+
+void
+refmpf_add_ulp (mpf_ptr f)
+{
+  mp_ptr     fp = PTR(f);
+  mp_size_t  fsize = SIZ(f);
+  mp_size_t  abs_fsize = ABSIZ(f);
+  mp_limb_t  c;
+
+  if (fsize == 0)
+    {
+      printf ("Oops, refmpf_add_ulp called with f==0\n");
+      abort ();
+    }
+
+  c = refmpn_add_1 (fp, fp, abs_fsize, CNST_LIMB(1));
+  if (c != 0)
+    {
+      if (abs_fsize >= PREC(f) + 1)
+        {
+          printf ("Oops, refmpf_add_ulp carried out of prec+1 limbs\n");
+          abort ();
+        }
+
+      fp[abs_fsize] = c;
+      abs_fsize++;
+      SIZ(f) = (fsize > 0 ? abs_fsize : - abs_fsize);
+      EXP(f)++;
+    }
+}
+
+/* Fill f with size limbs of the given value, setup as an integer. */
+void
+refmpf_fill (mpf_ptr f, mp_size_t size, mp_limb_t value)
+{
+  ASSERT (size >= 0);
+  size = MIN (PREC(f) + 1, size);
+  SIZ(f) = size;
+  EXP(f) = size;
+  refmpn_fill (PTR(f), size, value);
+}
+
+/* Strip high zero limbs from the f data, adjusting exponent accordingly. */
+void
+refmpf_normalize (mpf_ptr f)
+{
+  while (SIZ(f) != 0 && PTR(f)[ABSIZ(f)-1] == 0)
+    {
+      SIZ(f) = (SIZ(f) >= 0 ? SIZ(f)-1 : SIZ(f)+1);
+      EXP(f) --;
+    }
+  if (SIZ(f) == 0)
+    EXP(f) = 0;
+}
+
+/* refmpf_set_overlap sets up dst as a copy of src, but with PREC(dst)
+   unchanged, in preparation for an overlap test.
+
+   The full value of src is copied, and the space at PTR(dst) is extended as
+   necessary.  The way PREC(dst) is unchanged is as per an mpf_set_prec_raw.
+   The return value is the new PTR(dst) space precision, in bits, ready for
+   a restoring mpf_set_prec_raw before mpf_clear.  */
+
+unsigned long
+refmpf_set_overlap (mpf_ptr dst, mpf_srcptr src)
+{
+  mp_size_t  dprec = PREC(dst);
+  mp_size_t  ssize = ABSIZ(src);
+  unsigned long  ret;
+
+  refmpf_set_prec_limbs (dst, (unsigned long) MAX (dprec, ssize));
+  mpf_set (dst, src);
+
+  ret = mpf_get_prec (dst);
+  PREC(dst) = dprec;
+  return ret;
+}
+
+/* Like mpf_set_prec, but taking a precision in limbs.
+   PREC(f) ends up as the given "prec" value.  */
+void
+refmpf_set_prec_limbs (mpf_ptr f, unsigned long prec)
+{
+  mpf_set_prec (f, __GMPF_PREC_TO_BITS (prec));
+}
+
+
+void
+refmpf_sub (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_size_t hi, lo, size;
+  mp_ptr ut, vt, wt;
+  int neg;
+  mp_exp_t exp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (SIZ (u) == 0)
+    {
+      size = ABSIZ (v);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (v), size);
+      exp = EXP (v);
+      neg = SIZ (v) > 0;
+      goto done;
+    }
+  if (SIZ (v) == 0)
+    {
+      size = ABSIZ (u);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (u), size);
+      exp = EXP (u);
+      neg = SIZ (u) < 0;
+      goto done;
+    }
+  if ((SIZ (u) ^ SIZ (v)) < 0)
+    {
+      mpf_t tmp;
+      SIZ (tmp) = -SIZ (v);
+      EXP (tmp) = EXP (v);
+      PTR (tmp) = PTR (v);
+      refmpf_add (w, u, tmp);
+      if (SIZ (u) < 0)
+	mpf_neg (w, w);
+      return;
+    }
+  neg = SIZ (u) < 0;
+
+  /* Compute the significance of the hi and lo end of the result.  */
+  hi = MAX (EXP (u), EXP (v));
+  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));
+  size = hi - lo;
+  ut = TMP_ALLOC_LIMBS (size + 1);
+  vt = TMP_ALLOC_LIMBS (size + 1);
+  wt = TMP_ALLOC_LIMBS (size + 1);
+  MPN_ZERO (ut, size);
+  MPN_ZERO (vt, size);
+  {int off;
+  off = size + (EXP (u) - hi) - ABSIZ (u);
+  MPN_COPY (ut + off, PTR (u), ABSIZ (u));
+  off = size + (EXP (v) - hi) - ABSIZ (v);
+  MPN_COPY (vt + off, PTR (v), ABSIZ (v));
+  }
+
+  if (mpn_cmp (ut, vt, size) >= 0)
+    mpn_sub_n (wt, ut, vt, size);
+  else
+    {
+      mpn_sub_n (wt, vt, ut, size);
+      neg ^= 1;
+    }
+  exp = hi;
+  while (size != 0 && wt[size - 1] == 0)
+    {
+      size--;
+      exp--;
+    }
+
+done:
+  if (size > PREC (w))
+    {
+      wt += size - PREC (w);
+      size = PREC (w);
+    }
+  MPN_COPY (PTR (w), wt, size);
+  SIZ (w) = neg == 0 ? size : -size;
+  EXP (w) = exp;
+  TMP_FREE;
+}
+
+
+/* Validate got by comparing to want.  Return 1 if good, 0 if bad.
+
+   The data in got is compared to that in want, up to either PREC(got) limbs
+   or the size of got, whichever is bigger.  Clearly we always demand
+   PREC(got) of accuracy, but we go further and say that if got is bigger
+   then any extra must be correct too.
+
+   want needs to have enough data to allow this comparison.  The size in
+   want doesn't have to be that big though, if it's smaller then further low
+   limbs are taken to be zero.
+
+   This validation approach is designed to allow some flexibility in exactly
+   how much data is generated by an mpf function, ie. either prec or prec+1
+   limbs.  We don't try to make a reference function that emulates that same
+   size decision, instead the idea is for a validation function to generate
+   at least as much data as the real function, then compare.  */
+
+int
+refmpf_validate (const char *name, mpf_srcptr got, mpf_srcptr want)
+{
+  int  bad = 0;
+  mp_size_t  gsize, wsize, cmpsize, i;
+  mp_srcptr  gp, wp;
+  mp_limb_t  glimb, wlimb;
+
+  MPF_CHECK_FORMAT (got);
+
+  if (EXP (got) != EXP (want))
+    {
+      printf ("%s: wrong exponent\n", name);
+      bad = 1;
+    }
+
+  gsize = SIZ (got);
+  wsize = SIZ (want);
+  if ((gsize < 0 && wsize > 0) || (gsize > 0 && wsize < 0))
+    {
+      printf ("%s: wrong sign\n", name);
+      bad = 1;
+    }
+
+  gsize = ABS (gsize);
+  wsize = ABS (wsize);
+
+  /* most significant limb of respective data */
+  gp = PTR (got) + gsize - 1;
+  wp = PTR (want) + wsize - 1;
+
+  /* compare limb data */
+  cmpsize = MAX (PREC (got), gsize);
+  for (i = 0; i < cmpsize; i++)
+    {
+      glimb = (i < gsize ? gp[-i] : 0);
+      wlimb = (i < wsize ? wp[-i] : 0);
+
+      if (glimb != wlimb)
+        {
+          printf ("%s: wrong data starting at index %ld from top\n",
+                  name, (long) i);
+          bad = 1;
+          break;
+        }
+    }
+
+  if (bad)
+    {
+      printf ("  prec       %d\n", PREC(got));
+      printf ("  exp got    %ld\n", (long) EXP(got));
+      printf ("  exp want   %ld\n", (long) EXP(want));
+      printf ("  size got   %d\n", SIZ(got));
+      printf ("  size want  %d\n", SIZ(want));
+      printf ("  limbs (high to low)\n");
+      printf ("   got  ");
+      for (i = ABSIZ(got)-1; i >= 0; i--)
+        {
+          gmp_printf ("%MX", PTR(got)[i]);
+          if (i != 0)
+            printf (",");
+        }
+      printf ("\n");
+      printf ("   want ");
+      for (i = ABSIZ(want)-1; i >= 0; i--)
+        {
+          gmp_printf ("%MX", PTR(want)[i]);
+          if (i != 0)
+            printf (",");
+        }
+      printf ("\n");
+      return 0;
+    }
+
+  return 1;
+}
+
+
+int
+refmpf_validate_division (const char *name, mpf_srcptr got,
+                          mpf_srcptr n, mpf_srcptr d)
+{
+  mp_size_t  nsize, dsize, sign, prec, qsize, tsize;
+  mp_srcptr  np, dp;
+  mp_ptr     tp, qp, rp;
+  mpf_t      want;
+  int        ret;
+
+  nsize = SIZ (n);
+  dsize = SIZ (d);
+  ASSERT_ALWAYS (dsize != 0);
+
+  sign = nsize ^ dsize;
+  nsize = ABS (nsize);
+  dsize = ABS (dsize);
+
+  np = PTR (n);
+  dp = PTR (d);
+  prec = PREC (got);
+
+  EXP (want) = EXP (n) - EXP (d) + 1;
+
+  qsize = prec + 2;            /* at least prec+1 limbs, after high zero */
+  tsize = qsize + dsize - 1;   /* dividend size to give desired qsize */
+
+  /* dividend n, extended or truncated */
+  tp = refmpn_malloc_limbs (tsize);
+  refmpn_copy_extend (tp, tsize, np, nsize);
+
+  qp = refmpn_malloc_limbs (qsize);
+  rp = refmpn_malloc_limbs (dsize);  /* remainder, unused */
+
+  ASSERT_ALWAYS (qsize == tsize - dsize + 1);
+  refmpn_tdiv_qr (qp, rp, (mp_size_t) 0, tp, tsize, dp, dsize);
+
+  PTR (want) = qp;
+  SIZ (want) = (sign >= 0 ? qsize : -qsize);
+  refmpf_normalize (want);
+
+  ret = refmpf_validate (name, got, want);
+
+  free (tp);
+  free (qp);
+  free (rp);
+
+  return ret;
+}

diff --git a/tests/refmpn.c b/tests/refmpn.c
new file mode 100644
index 0000000..42bb411
--- /dev/null
+++ b/tests/refmpn.c

@@ -0,0 +1,2630 @@
+/* Reference mpn functions, designed to be simple, portable and independent
+   of the normal gmp code.  Speed isn't a consideration.
+
+Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Most routines have assertions representing what the mpn routines are
+   supposed to accept.  Many of these reference routines do sensible things
+   outside these ranges (eg. for size==0), but the assertions are present to
+   pick up bad parameters passed here that are about to be passed the same
+   to a real mpn routine being compared.  */
+
+/* always do assertion checking */
+#define WANT_ASSERT  1
+
+#include <stdio.h>  /* for NULL */
+#include <stdlib.h> /* for malloc */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+
+
+
+/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
+   in bytes. */
+int
+byte_overlap_p (const void *v_xp, mp_size_t xsize,
+		const void *v_yp, mp_size_t ysize)
+{
+  const char *xp = (const char *) v_xp;
+  const char *yp = (const char *) v_yp;
+
+  ASSERT (xsize >= 0);
+  ASSERT (ysize >= 0);
+
+  /* no wraparounds */
+  ASSERT (xp+xsize >= xp);
+  ASSERT (yp+ysize >= yp);
+
+  if (xp + xsize <= yp)
+    return 0;
+
+  if (yp + ysize <= xp)
+    return 0;
+
+  return 1;
+}
+
+/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
+int
+refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
+{
+  return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
+			 yp, ysize * GMP_LIMB_BYTES);
+}
+
+/* Check overlap for a routine defined to work low to high. */
+int
+refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
+}
+
+/* Check overlap for a routine defined to work high to low. */
+int
+refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
+}
+
+/* Check overlap for a standard routine requiring equal or separate. */
+int
+refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
+}
+int
+refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
+			       mp_size_t size)
+{
+  return (refmpn_overlap_fullonly_p (dst, src1, size)
+	  && refmpn_overlap_fullonly_p (dst, src2, size));
+}
+
+
+mp_ptr
+refmpn_malloc_limbs (mp_size_t size)
+{
+  mp_ptr  p;
+  ASSERT (size >= 0);
+  if (size == 0)
+    size = 1;
+  p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
+  ASSERT (p != NULL);
+  return p;
+}
+
+/* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
+ * memory allocated by refmpn_malloc_limbs_aligned. */
+void
+refmpn_free_limbs (mp_ptr p)
+{
+  free (p);
+}
+
+mp_ptr
+refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
+{
+  mp_ptr  p;
+  p = refmpn_malloc_limbs (size);
+  refmpn_copyi (p, ptr, size);
+  return p;
+}
+
+/* malloc n limbs on a multiple of m bytes boundary */
+mp_ptr
+refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
+{
+  return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
+}
+
+
+void
+refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
+{
+  mp_size_t  i;
+  ASSERT (size >= 0);
+  for (i = 0; i < size; i++)
+    ptr[i] = value;
+}
+
+void
+refmpn_zero (mp_ptr ptr, mp_size_t size)
+{
+  refmpn_fill (ptr, size, CNST_LIMB(0));
+}
+
+void
+refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
+{
+  ASSERT (newsize >= oldsize);
+  refmpn_zero (ptr+oldsize, newsize-oldsize);
+}
+
+int
+refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  for (i = 0; i < size; i++)
+    if (ptr[i] != 0)
+      return 0;
+  return 1;
+}
+
+mp_size_t
+refmpn_normalize (mp_srcptr ptr, mp_size_t size)
+{
+  ASSERT (size >= 0);
+  while (size > 0 && ptr[size-1] == 0)
+    size--;
+  return size;
+}
+
+/* the highest one bit in x */
+mp_limb_t
+refmpn_msbone (mp_limb_t x)
+{
+  mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
+
+  while (n != 0)
+    {
+      if (x & n)
+	break;
+      n >>= 1;
+    }
+  return n;
+}
+
+/* a mask of the highest one bit plus and all bits below */
+mp_limb_t
+refmpn_msbone_mask (mp_limb_t x)
+{
+  if (x == 0)
+    return 0;
+
+  return (refmpn_msbone (x) << 1) - 1;
+}
+
+/* How many digits in the given base will fit in a limb.
+   Notice that the product b is allowed to be equal to the limit
+   2^GMP_NUMB_BITS, this ensures the result for base==2 will be
+   GMP_NUMB_BITS (and similarly other powers of 2).  */
+int
+refmpn_chars_per_limb (int base)
+{
+  mp_limb_t  limit[2], b[2];
+  int        chars_per_limb;
+
+  ASSERT (base >= 2);
+
+  limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
+  limit[1] = 1;
+  b[0] = 1;      /* b = 1 */
+  b[1] = 0;
+
+  chars_per_limb = 0;
+  for (;;)
+    {
+      if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
+	break;
+      if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
+	break;
+      chars_per_limb++;
+    }
+  return chars_per_limb;
+}
+
+/* The biggest value base**n which fits in GMP_NUMB_BITS. */
+mp_limb_t
+refmpn_big_base (int base)
+{
+  int        chars_per_limb = refmpn_chars_per_limb (base);
+  int        i;
+  mp_limb_t  bb;
+
+  ASSERT (base >= 2);
+  bb = 1;
+  for (i = 0; i < chars_per_limb; i++)
+    bb *= base;
+  return bb;
+}
+
+
+void
+refmpn_setbit (mp_ptr ptr, unsigned long bit)
+{
+  ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
+}
+
+void
+refmpn_clrbit (mp_ptr ptr, unsigned long bit)
+{
+  ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
+}
+
+#define REFMPN_TSTBIT(ptr,bit) \
+  (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
+
+int
+refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
+{
+  return REFMPN_TSTBIT (ptr, bit);
+}
+
+unsigned long
+refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
+{
+  while (REFMPN_TSTBIT (ptr, bit) != 0)
+    bit++;
+  return bit;
+}
+
+unsigned long
+refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
+{
+  while (REFMPN_TSTBIT (ptr, bit) == 0)
+    bit++;
+  return bit;
+}
+
+void
+refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  refmpn_copyi (rp, sp, size);
+}
+
+void
+refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 0);
+
+  for (i = 0; i < size; i++)
+    rp[i] = sp[i];
+}
+
+void
+refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+  ASSERT (size >= 0);
+
+  for (i = size-1; i >= 0; i--)
+    rp[i] = sp[i];
+}
+
+/* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
+   zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
+void
+refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
+{
+  ASSERT (wsize >= 0);
+  ASSERT (xsize >= 0);
+
+  /* high part of x if x bigger than w */
+  if (xsize > wsize)
+    {
+      xp += xsize - wsize;
+      xsize = wsize;
+    }
+
+  refmpn_copy (wp + wsize-xsize, xp, xsize);
+  refmpn_zero (wp, wsize-xsize);
+}
+
+int
+refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT_MPN (xp, size);
+  ASSERT_MPN (yp, size);
+
+  for (i = size-1; i >= 0; i--)
+    {
+      if (xp[i] > yp[i])  return 1;
+      if (xp[i] < yp[i])  return -1;
+    }
+  return 0;
+}
+
+int
+refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  if (size == 0)
+    return 0;
+  else
+    return refmpn_cmp (xp, yp, size);
+}
+
+int
+refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
+		     mp_srcptr yp, mp_size_t ysize)
+{
+  int  opp, cmp;
+
+  ASSERT_MPN (xp, xsize);
+  ASSERT_MPN (yp, ysize);
+
+  opp = (xsize < ysize);
+  if (opp)
+    MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
+
+  if (! refmpn_zero_p (xp+ysize, xsize-ysize))
+    cmp = 1;
+  else
+    cmp = refmpn_cmp (xp, yp, ysize);
+
+  return (opp ? -cmp : cmp);
+}
+
+int
+refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_size_t  i;
+  ASSERT (size >= 0);
+
+  for (i = 0; i < size; i++)
+      if (xp[i] != yp[i])
+	return 0;
+  return 1;
+}
+
+
+#define LOGOPS(operation)                                               \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+									\
+    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
+    ASSERT (size >= 1);                                                 \
+    ASSERT_MPN (s1p, size);                                             \
+    ASSERT_MPN (s2p, size);                                             \
+									\
+    for (i = 0; i < size; i++)                                          \
+      rp[i] = operation;                                                \
+  }
+
+void
+refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] & s2p[i]);
+}
+void
+refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] & ~s2p[i]);
+}
+void
+refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
+}
+void
+refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] | s2p[i]);
+}
+void
+refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
+}
+void
+refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
+}
+void
+refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] ^ s2p[i]);
+}
+void
+refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
+}
+
+
+/* set *dh,*dl to mh:ml - sh:sl, in full limbs */
+void
+refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
+		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
+{
+  *dl = ml - sl;
+  *dh = mh - sh - (ml < sl);
+}
+
+
+/* set *w to x+y, return 0 or 1 carry */
+mp_limb_t
+ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  sum, cy;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+
+  sum = x + y;
+#if GMP_NAIL_BITS == 0
+  *w = sum;
+  cy = (sum < x);
+#else
+  *w = sum & GMP_NUMB_MASK;
+  cy = (sum >> GMP_NUMB_BITS);
+#endif
+  return cy;
+}
+
+/* set *w to x-y, return 0 or 1 borrow */
+mp_limb_t
+ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  diff, cy;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+
+  diff = x - y;
+#if GMP_NAIL_BITS == 0
+  *w = diff;
+  cy = (diff > x);
+#else
+  *w = diff & GMP_NUMB_MASK;
+  cy = (diff >> GMP_NUMB_BITS) & 1;
+#endif
+  return cy;
+}
+
+/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
+mp_limb_t
+adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
+{
+  mp_limb_t  r;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+  ASSERT (c == 0 || c == 1);
+
+  r = ref_addc_limb (w, x, y);
+  return r + ref_addc_limb (w, *w, c);
+}
+
+/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
+mp_limb_t
+sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
+{
+  mp_limb_t  r;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+  ASSERT (c == 0 || c == 1);
+
+  r = ref_subc_limb (w, x, y);
+  return r + ref_subc_limb (w, *w, c);
+}
+
+
+#define AORS_1(operation)                               \
+  {                                                     \
+    mp_size_t  i;                                       \
+							\
+    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
+    ASSERT (size >= 1);                                 \
+    ASSERT_MPN (sp, size);                              \
+    ASSERT_LIMB (n);                                    \
+							\
+    for (i = 0; i < size; i++)                          \
+      n = operation (&rp[i], sp[i], n);                 \
+    return n;                                           \
+  }
+
+mp_limb_t
+refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
+{
+  AORS_1 (ref_addc_limb);
+}
+mp_limb_t
+refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
+{
+  AORS_1 (ref_subc_limb);
+}
+
+#define AORS_NC(operation)                                              \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+									\
+    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
+    ASSERT (carry == 0 || carry == 1);                                  \
+    ASSERT (size >= 1);                                                 \
+    ASSERT_MPN (s1p, size);                                             \
+    ASSERT_MPN (s2p, size);                                             \
+									\
+    for (i = 0; i < size; i++)                                          \
+      carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+	       mp_limb_t carry)
+{
+  AORS_NC (adc);
+}
+mp_limb_t
+refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+	       mp_limb_t carry)
+{
+  AORS_NC (sbb);
+}
+
+
+mp_limb_t
+refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
+}
+mp_limb_t
+refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  if (cnd != 0)
+    return refmpn_add_n (rp, s1p, s2p, size);
+  else
+    {
+      refmpn_copyi (rp, s1p, size);
+      return 0;
+    }
+}
+mp_limb_t
+refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  if (cnd != 0)
+    return refmpn_sub_n (rp, s1p, s2p, size);
+  else
+    {
+      refmpn_copyi (rp, s1p, size);
+      return 0;
+    }
+}
+
+
+#define AORS_ERR1_N(operation)						\
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;							\
+									\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, yp, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 2, yp, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 2, rp, size));			\
+									\
+    ASSERT (carry == 0 || carry == 1);					\
+    ASSERT (size >= 1);							\
+    ASSERT_MPN (s1p, size);						\
+    ASSERT_MPN (s2p, size);						\
+    ASSERT_MPN (yp, size);						\
+									\
+    ep[0] = ep[1] = CNST_LIMB(0);					\
+									\
+    for (i = 0; i < size; i++)                                          \
+      {									\
+	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
+	if (carry == 1)							\
+	  {								\
+	    carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	  }								\
+      }									\
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr yp,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR1_N (adc);
+}
+mp_limb_t
+refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr yp,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR1_N (sbb);
+}
+
+
+#define AORS_ERR2_N(operation)						\
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;							\
+									\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 4, rp, size));			\
+									\
+    ASSERT (carry == 0 || carry == 1);					\
+    ASSERT (size >= 1);							\
+    ASSERT_MPN (s1p, size);						\
+    ASSERT_MPN (s2p, size);						\
+    ASSERT_MPN (y1p, size);						\
+    ASSERT_MPN (y2p, size);						\
+									\
+    ep[0] = ep[1] = CNST_LIMB(0);					\
+    ep[2] = ep[3] = CNST_LIMB(0);					\
+									\
+    for (i = 0; i < size; i++)                                          \
+      {									\
+	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
+	if (carry == 1)							\
+	  {								\
+	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	  }								\
+      }									\
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR2_N (adc);
+}
+mp_limb_t
+refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR2_N (sbb);
+}
+
+
+#define AORS_ERR3_N(operation)						\
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;							\
+									\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
+    ASSERT (! refmpn_overlap_p (rp, size, y3p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));			\
+    ASSERT (! refmpn_overlap_p (ep, 6, rp, size));			\
+									\
+    ASSERT (carry == 0 || carry == 1);					\
+    ASSERT (size >= 1);							\
+    ASSERT_MPN (s1p, size);						\
+    ASSERT_MPN (s2p, size);						\
+    ASSERT_MPN (y1p, size);						\
+    ASSERT_MPN (y2p, size);						\
+    ASSERT_MPN (y3p, size);						\
+									\
+    ep[0] = ep[1] = CNST_LIMB(0);					\
+    ep[2] = ep[3] = CNST_LIMB(0);					\
+    ep[4] = ep[5] = CNST_LIMB(0);					\
+									\
+    for (i = 0; i < size; i++)                                          \
+      {									\
+	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
+	if (carry == 1)							\
+	  {								\
+	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	    carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);	\
+	    carry2 = ref_addc_limb (&ep[5], ep[5], carry2);		\
+	    ASSERT (carry2 == 0);					\
+	  }								\
+      }									\
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR3_N (adc);
+}
+mp_limb_t
+refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+		   mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR3_N (sbb);
+}
+
+
+mp_limb_t
+refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		 mp_size_t n, unsigned int s)
+{
+  mp_limb_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = refmpn_lshift (tp, vp, n, s);
+  cy += refmpn_add_n (rp, up, tp, n);
+  free (tp);
+  return cy;
+}
+mp_limb_t
+refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, up, vp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, up, vp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		  mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+  mp_limb_t cy;
+
+  ASSERT (carry <= (CNST_LIMB(1) << s));
+
+  cy = refmpn_addlsh_n (rp, up, vp, n, s);
+  cy += refmpn_add_1 (rp, rp, n, carry);
+  return cy;
+}
+mp_limb_t
+refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
+}
+
+mp_limb_t
+refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		 mp_size_t n, unsigned int s)
+{
+  mp_limb_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = mpn_lshift (tp, vp, n, s);
+  cy += mpn_sub_n (rp, up, tp, n);
+  free (tp);
+  return cy;
+}
+mp_limb_t
+refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, up, vp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, up, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		  mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+  mp_limb_t cy;
+
+  ASSERT (carry <= (CNST_LIMB(1) << s));
+
+  cy = refmpn_sublsh_n (rp, up, vp, n, s);
+  cy += refmpn_sub_1 (rp, rp, n, carry);
+  return cy;
+}
+mp_limb_t
+refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
+}
+
+mp_limb_signed_t
+refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		 mp_size_t n, unsigned int s)
+{
+  mp_limb_signed_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = mpn_lshift (tp, vp, n, s);
+  cy -= mpn_sub_n (rp, tp, up, n);
+  free (tp);
+  return cy;
+}
+mp_limb_signed_t
+refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_rsblsh_n (rp, up, vp, n, 1);
+}
+mp_limb_signed_t
+refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_rsblsh_n (rp, up, vp, n, 2);
+}
+mp_limb_signed_t
+refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+		  mp_size_t n, unsigned int s, mp_limb_signed_t carry)
+{
+  mp_limb_signed_t cy;
+
+  ASSERT (carry == -1 || (carry >> s) == 0);
+
+  cy = refmpn_rsblsh_n (rp, up, vp, n, s);
+  if (carry > 0)
+    cy += refmpn_add_1 (rp, rp, n, carry);
+  else
+    cy -= refmpn_sub_1 (rp, rp, n, -carry);
+  return cy;
+}
+mp_limb_signed_t
+refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+  return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_signed_t
+refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+  return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
+}
+
+mp_limb_t
+refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cya, cys;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  cya = mpn_add_n (rp, up, vp, n);
+  cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
+  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
+  return cys;
+}
+mp_limb_t
+refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cya, cys;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  cya = mpn_sub_n (rp, up, vp, n);
+  cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
+  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
+  return cys;
+}
+
+/* Twos complement, return borrow. */
+mp_limb_t
+refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr     zeros;
+  mp_limb_t  ret;
+
+  ASSERT (size >= 1);
+
+  zeros = refmpn_malloc_limbs (size);
+  refmpn_fill (zeros, size, CNST_LIMB(0));
+  ret = refmpn_sub_n (dst, zeros, src, size);
+  free (zeros);
+  return ret;
+}
+
+
+#define AORS(aors_n, aors_1)                                    \
+  {                                                             \
+    mp_limb_t  c;                                               \
+    ASSERT (s1size >= s2size);                                  \
+    ASSERT (s2size >= 1);                                       \
+    c = aors_n (rp, s1p, s2p, s2size);                          \
+    if (s1size-s2size != 0)                                     \
+      c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
+    return c;                                                   \
+  }
+mp_limb_t
+refmpn_add (mp_ptr rp,
+	    mp_srcptr s1p, mp_size_t s1size,
+	    mp_srcptr s2p, mp_size_t s2size)
+{
+  AORS (refmpn_add_n, refmpn_add_1);
+}
+mp_limb_t
+refmpn_sub (mp_ptr rp,
+	    mp_srcptr s1p, mp_size_t s1size,
+	    mp_srcptr s2p, mp_size_t s2size)
+{
+  AORS (refmpn_sub_n, refmpn_sub_1);
+}
+
+
+#define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
+#define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
+
+#define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
+#define HIGHMASK  SHIFTHIGH(LOWMASK)
+
+#define LOWPART(x)   ((x) & LOWMASK)
+#define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
+
+/* Set return:*lo to x*y, using full limbs not nails. */
+mp_limb_t
+refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  hi, s;
+
+  *lo = LOWPART(x) * LOWPART(y);
+  hi = HIGHPART(x) * HIGHPART(y);
+
+  s = LOWPART(x) * HIGHPART(y);
+  hi += HIGHPART(s);
+  s = SHIFTHIGH(LOWPART(s));
+  *lo += s;
+  hi += (*lo < s);
+
+  s = HIGHPART(x) * LOWPART(y);
+  hi += HIGHPART(s);
+  s = SHIFTHIGH(LOWPART(s));
+  *lo += s;
+  hi += (*lo < s);
+
+  return hi;
+}
+
+mp_limb_t
+refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
+{
+  return refmpn_umul_ppmm (lo, x, y);
+}
+
+mp_limb_t
+refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
+	       mp_limb_t carry)
+{
+  mp_size_t  i;
+  mp_limb_t  hi, lo;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT_MPN (sp, size);
+  ASSERT_LIMB (multiplier);
+  ASSERT_LIMB (carry);
+
+  multiplier <<= GMP_NAIL_BITS;
+  for (i = 0; i < size; i++)
+    {
+      hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
+      lo >>= GMP_NAIL_BITS;
+      ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
+      rp[i] = lo;
+      carry = hi;
+    }
+  return carry;
+}
+
+mp_limb_t
+refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
+	      mp_srcptr mult, mp_size_t msize)
+{
+  mp_ptr     src_copy;
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
+  ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
+  ASSERT (size >= msize);
+  ASSERT_MPN (mult, msize);
+
+  /* in case dst==src */
+  src_copy = refmpn_malloc_limbs (size);
+  refmpn_copyi (src_copy, src, size);
+  src = src_copy;
+
+  dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
+  for (i = 1; i < msize-1; i++)
+    dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+  ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+
+  free (src_copy);
+  return ret;
+}
+
+mp_limb_t
+refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
+}
+mp_limb_t
+refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
+}
+mp_limb_t
+refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
+}
+mp_limb_t
+refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
+
+#define AORSMUL_1C(operation_n)                                 \
+  {                                                             \
+    mp_ptr     p;                                               \
+    mp_limb_t  ret;                                             \
+								\
+    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
+								\
+    p = refmpn_malloc_limbs (size);                             \
+    ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
+    ret += operation_n (rp, rp, p, size);                       \
+								\
+    free (p);                                                   \
+    return ret;                                                 \
+  }
+
+mp_limb_t
+refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+		  mp_limb_t multiplier, mp_limb_t carry)
+{
+  AORSMUL_1C (refmpn_add_n);
+}
+mp_limb_t
+refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+		  mp_limb_t multiplier, mp_limb_t carry)
+{
+  AORSMUL_1C (refmpn_sub_n);
+}
+
+
+mp_limb_t
+refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+mp_limb_t
+refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
+		 mp_srcptr mult, mp_size_t msize)
+{
+  mp_ptr     src_copy;
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
+  ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
+  ASSERT (size >= msize);
+  ASSERT_MPN (mult, msize);
+
+  /* in case dst==src */
+  src_copy = refmpn_malloc_limbs (size);
+  refmpn_copyi (src_copy, src, size);
+  src = src_copy;
+
+  for (i = 0; i < msize-1; i++)
+    dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+  ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+
+  free (src_copy);
+  return ret;
+}
+
+mp_limb_t
+refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
+}
+mp_limb_t
+refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
+}
+mp_limb_t
+refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
+}
+mp_limb_t
+refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
+mp_limb_t
+refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
+}
+mp_limb_t
+refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
+}
+
+mp_limb_t
+refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
+		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+		  mp_limb_t carry)
+{
+  mp_ptr p;
+  mp_limb_t acy, scy;
+
+  /* Destinations can't overlap. */
+  ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
+  ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
+  ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
+  ASSERT (size >= 1);
+
+  /* in case r1p==s1p or r1p==s2p */
+  p = refmpn_malloc_limbs (size);
+
+  acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
+  scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
+  refmpn_copyi (r1p, p, size);
+
+  free (p);
+  return 2 * acy + scy;
+}
+
+mp_limb_t
+refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
+		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
+}
+
+
+/* Right shift hi,lo and return the low limb of the result.
+   Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
+mp_limb_t
+rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
+{
+  ASSERT (shift < GMP_NUMB_BITS);
+  if (shift == 0)
+    return lo;
+  else
+    return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
+}
+
+/* Left shift hi,lo and return the high limb of the result.
+   Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
+mp_limb_t
+lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
+{
+  ASSERT (shift < GMP_NUMB_BITS);
+  if (shift == 0)
+    return hi;
+  else
+    return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+}
+
+
+mp_limb_t
+refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
+  ASSERT_MPN (sp, size);
+
+  ret = rshift_make (sp[0], CNST_LIMB(0), shift);
+
+  for (i = 0; i < size-1; i++)
+    rp[i] = rshift_make (sp[i+1], sp[i], shift);
+
+  rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
+  return ret;
+}
+
+mp_limb_t
+refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
+  ASSERT_MPN (sp, size);
+
+  ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
+
+  for (i = size-2; i >= 0; i--)
+    rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
+
+  rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
+  return ret;
+}
+
+void
+refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  /* We work downwards since mpn_lshiftc needs that. */
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+
+  for (i = size - 1; i >= 0; i--)
+    rp[i] = (~sp[i]) & GMP_NUMB_MASK;
+}
+
+mp_limb_t
+refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t res;
+
+  /* No asserts here, refmpn_lshift will assert what we need. */
+
+  res = refmpn_lshift (rp, sp, size, shift);
+  refmpn_com (rp, rp, size);
+  return res;
+}
+
+/* accepting shift==0 and doing a plain copyi or copyd in that case */
+mp_limb_t
+refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  if (shift == 0)
+    {
+      refmpn_copyi (rp, sp, size);
+      return 0;
+    }
+  else
+    {
+      return refmpn_rshift (rp, sp, size, shift);
+    }
+}
+mp_limb_t
+refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  if (shift == 0)
+    {
+      refmpn_copyd (rp, sp, size);
+      return 0;
+    }
+  else
+    {
+      return refmpn_lshift (rp, sp, size, shift);
+    }
+}
+
+/* accepting size==0 too */
+mp_limb_t
+refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+			   unsigned shift)
+{
+  return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
+}
+mp_limb_t
+refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+			   unsigned shift)
+{
+  return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
+}
+
+/* Divide h,l by d, return quotient, store remainder to *rp.
+   Operates on full limbs, not nails.
+   Must have h < d.
+   __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
+mp_limb_t
+refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
+{
+  mp_limb_t  q, r;
+  int  n;
+
+  ASSERT (d != 0);
+  ASSERT (h < d);
+
+#if 0
+  udiv_qrnnd (q, r, h, l, d);
+  *rp = r;
+  return q;
+#endif
+
+  n = refmpn_count_leading_zeros (d);
+  d <<= n;
+
+  if (n != 0)
+    {
+      h = (h << n) | (l >> (GMP_LIMB_BITS - n));
+      l <<= n;
+    }
+
+  __udiv_qrnnd_c (q, r, h, l, d);
+  r >>= n;
+  *rp = r;
+  return q;
+}
+
+mp_limb_t
+refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
+{
+  return refmpn_udiv_qrnnd (rp, h, l, d);
+}
+
+/* This little subroutine avoids some bad code generation from i386 gcc 3.0
+   -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
+static mp_limb_t
+refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+			     mp_limb_t divisor, mp_limb_t carry)
+{
+  mp_size_t  i;
+  mp_limb_t rem[1];
+  for (i = size-1; i >= 0; i--)
+    {
+      rp[i] = refmpn_udiv_qrnnd (rem, carry,
+				 sp[i] << GMP_NAIL_BITS,
+				 divisor << GMP_NAIL_BITS);
+      carry = *rem >> GMP_NAIL_BITS;
+    }
+  return carry;
+}
+
+mp_limb_t
+refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+		  mp_limb_t divisor, mp_limb_t carry)
+{
+  mp_ptr     sp_orig;
+  mp_ptr     prod;
+  mp_limb_t  carry_orig;
+
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  ASSERT (size >= 0);
+  ASSERT (carry < divisor);
+  ASSERT_MPN (sp, size);
+  ASSERT_LIMB (divisor);
+  ASSERT_LIMB (carry);
+
+  if (size == 0)
+    return carry;
+
+  sp_orig = refmpn_memdup_limbs (sp, size);
+  prod = refmpn_malloc_limbs (size);
+  carry_orig = carry;
+
+  carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
+
+  /* check by multiplying back */
+#if 0
+  printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
+	  size, divisor, carry_orig, carry);
+  mpn_trace("s",sp_copy,size);
+  mpn_trace("r",rp,size);
+  printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
+  mpn_trace("p",prod,size);
+#endif
+  ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
+  ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
+  free (sp_orig);
+  free (prod);
+
+  return carry;
+}
+
+mp_limb_t
+refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+	       mp_limb_t carry)
+{
+  mp_ptr  p = refmpn_malloc_limbs (size);
+  carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
+  free (p);
+  return carry;
+}
+
+mp_limb_t
+refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+		     mp_limb_t inverse)
+{
+  ASSERT (divisor & GMP_NUMB_HIGHBIT);
+  ASSERT (inverse == refmpn_invert_limb (divisor));
+  return refmpn_mod_1 (sp, size, divisor);
+}
+
+/* This implementation will be rather slow, but has the advantage of being
+   in a different style than the libgmp versions.  */
+mp_limb_t
+refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+  ASSERT ((GMP_NUMB_BITS % 4) == 0);
+  return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
+}
+
+
+mp_limb_t
+refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
+		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+		  mp_limb_t carry)
+{
+  mp_ptr  z;
+
+  z = refmpn_malloc_limbs (xsize);
+  refmpn_fill (z, xsize, CNST_LIMB(0));
+
+  carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
+  carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
+
+  free (z);
+  return carry;
+}
+
+mp_limb_t
+refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
+		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
+			mp_srcptr sp, mp_size_t size,
+			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
+{
+  ASSERT (size >= 0);
+  ASSERT (shift == refmpn_count_leading_zeros (divisor));
+  ASSERT (inverse == refmpn_invert_limb (divisor << shift));
+
+  return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
+}
+
+mp_limb_t
+refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+		 mp_ptr np, mp_size_t nn,
+		 mp_srcptr dp)
+{
+  mp_ptr tp;
+  mp_limb_t qh;
+
+  tp = refmpn_malloc_limbs (nn + qxn);
+  refmpn_zero (tp, qxn);
+  refmpn_copyi (tp + qxn, np, nn);
+  qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
+  refmpn_copyi (np, tp, 2);
+  free (tp);
+  return qh;
+}
+
+/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
+   paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
+   since d has the high bit set. */
+
+mp_limb_t
+refmpn_invert_limb (mp_limb_t d)
+{
+  mp_limb_t r;
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
+}
+
+void
+refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr qp, tp;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (2 * n);
+  qp = TMP_ALLOC_LIMBS (n + 1);
+
+  MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
+
+  refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
+  refmpn_copyi (rp, qp, n);
+
+  TMP_FREE;
+}
+
+void
+refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr tp;
+  mp_limb_t binv;
+  TMP_DECL;
+  TMP_MARK;
+
+  /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
+     code.  To make up for it, we check that the inverse is correct using a
+     multiply.  */
+
+  tp = TMP_ALLOC_LIMBS (2 * n);
+
+  MPN_ZERO (tp, n);
+  tp[0] = 1;
+  binvert_limb (binv, up[0]);
+  mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
+
+  refmpn_mul_n (tp, rp, up, n);
+  ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
+
+  TMP_FREE;
+}
+
+/* The aim is to produce a dst quotient and return a remainder c, satisfying
+   c*b^n + src-i == 3*dst, where i is the incoming carry.
+
+   Some value c==0, c==1 or c==2 will satisfy, so just try each.
+
+   If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
+   remainder from the first division attempt determines the correct
+   remainder (3-c), but don't bother with that, since we can't guarantee
+   anything about GMP_NUMB_BITS when using nails.
+
+   If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
+   complement negative, ie. b^n+a-i, and the calculation produces c1
+   satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
+   means it's enough to just add any borrow back at the end.
+
+   A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
+   mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
+   or 1 respectively, so with 1 added the final return value is still in the
+   prescribed range 0 to 2. */
+
+mp_limb_t
+refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
+{
+  mp_ptr     spcopy;
+  mp_limb_t  c, cs;
+
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (carry <= 2);
+  ASSERT_MPN (sp, size);
+
+  spcopy = refmpn_malloc_limbs (size);
+  cs = refmpn_sub_1 (spcopy, sp, size, carry);
+
+  for (c = 0; c <= 2; c++)
+    if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
+      goto done;
+  ASSERT_FAIL (no value of c satisfies);
+
+ done:
+  c += cs;
+  ASSERT (c <= 2);
+
+  free (spcopy);
+  return c;
+}
+
+mp_limb_t
+refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
+}
+
+
+/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
+void
+refmpn_mul_basecase (mp_ptr prodp,
+		     mp_srcptr up, mp_size_t usize,
+		     mp_srcptr vp, mp_size_t vsize)
+{
+  mp_size_t i;
+
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
+  ASSERT (usize >= vsize);
+  ASSERT (vsize >= 1);
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
+  for (i = 1; i < vsize; i++)
+    prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
+}
+
+
+/* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
+void
+refmpn_mulmid_basecase (mp_ptr rp,
+			mp_srcptr up, mp_size_t un,
+			mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t cy;
+  mp_size_t i;
+
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
+  ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
+  ASSERT_MPN (up, un);
+  ASSERT_MPN (vp, vn);
+
+  rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
+  rp[un - vn + 2] = CNST_LIMB (0);
+  for (i = 1; i < vn; i++)
+    {
+      cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
+      cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
+      cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
+      ASSERT (cy == 0);
+    }
+}
+
+void
+refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
+		      mp_ptr scratch)
+{
+  refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+     off products near the middle product region boundary */
+  refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
+	       mp_srcptr vp, mp_size_t vn)
+{
+  /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+     off products near the middle product region boundary */
+  refmpn_mulmid_basecase (rp, up, un, vp, vn);
+}
+
+
+
+#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
+#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
+#define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
+#if WANT_FFT
+#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
+#else
+#define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
+#endif
+
+void
+refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+  mp_ptr tp, rp;
+  mp_size_t tn;
+
+  if (vn < TOOM3_THRESHOLD)
+    {
+      /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */
+      if (vn != 0)
+	refmpn_mul_basecase (wp, up, un, vp, vn);
+      else
+	MPN_ZERO (wp, un);
+      return;
+    }
+
+  MPN_ZERO (wp, vn);
+  rp = refmpn_malloc_limbs (2 * vn);
+
+  if (vn < TOOM4_THRESHOLD)
+    tn = mpn_toom22_mul_itch (vn, vn);
+  else if (vn < TOOM6_THRESHOLD)
+    tn = mpn_toom33_mul_itch (vn, vn);
+  else if (vn < FFT_THRESHOLD)
+    tn = mpn_toom44_mul_itch (vn, vn);
+  else
+    tn = mpn_toom6h_mul_itch (vn, vn);
+  tp = refmpn_malloc_limbs (tn);
+
+  while (un >= vn)
+    {
+      if (vn < TOOM4_THRESHOLD)
+	/* In the toom3 range, use mpn_toom22_mul.  */
+	mpn_toom22_mul (rp, up, vn, vp, vn, tp);
+      else if (vn < TOOM6_THRESHOLD)
+	/* In the toom4 range, use mpn_toom33_mul.  */
+	mpn_toom33_mul (rp, up, vn, vp, vn, tp);
+      else if (vn < FFT_THRESHOLD)
+	/* In the toom6 range, use mpn_toom44_mul.  */
+	mpn_toom44_mul (rp, up, vn, vp, vn, tp);
+      else
+	/* For the largest operands, use mpn_toom6h_mul.  */
+	mpn_toom6h_mul (rp, up, vn, vp, vn, tp);
+
+      ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn));
+      wp += vn;
+
+      up += vn;
+      un -= vn;
+    }
+
+  free (tp);
+
+  if (un != 0)
+    {
+      refmpn_mul (rp, vp, vn, up, un);
+      ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn));
+    }
+  free (rp);
+}
+
+void
+refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+{
+  refmpn_mul (prodp, up, size, vp, size);
+}
+
+void
+refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+{
+  mp_ptr tp = refmpn_malloc_limbs (2*size);
+  refmpn_mul (tp, up, size, vp, size);
+  refmpn_copyi (prodp, tp, size);
+  free (tp);
+}
+
+void
+refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  refmpn_mul (dst, src, size, src, size);
+}
+
+void
+refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  refmpn_mullo_n (dst, src, src, size);
+}
+
+/* Allowing usize<vsize, usize==0 or vsize==0. */
+void
+refmpn_mul_any (mp_ptr prodp,
+		     mp_srcptr up, mp_size_t usize,
+		     mp_srcptr vp, mp_size_t vsize)
+{
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
+  ASSERT (usize >= 0);
+  ASSERT (vsize >= 0);
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  if (usize == 0)
+    {
+      refmpn_fill (prodp, vsize, CNST_LIMB(0));
+      return;
+    }
+
+  if (vsize == 0)
+    {
+      refmpn_fill (prodp, usize, CNST_LIMB(0));
+      return;
+    }
+
+  if (usize >= vsize)
+    refmpn_mul (prodp, up, usize, vp, vsize);
+  else
+    refmpn_mul (prodp, vp, vsize, up, usize);
+}
+
+
+mp_limb_t
+refmpn_gcd_11 (mp_limb_t x, mp_limb_t y)
+{
+  /* The non-ref function also requires input operands to be odd, but
+     below refmpn_gcd_1 doesn't guarantee that. */
+  ASSERT (x > 0);
+  ASSERT (y > 0);
+  do
+    {
+      while ((x & 1) == 0)  x >>= 1;
+      while ((y & 1) == 0)  y >>= 1;
+
+      if (x < y)
+	MP_LIMB_T_SWAP (x, y);
+
+      x -= y;
+    }
+  while (x != 0);
+
+  return y;
+}
+
+mp_double_limb_t
+refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0)
+{
+  ASSERT ((x0 & 1) != 0);
+  ASSERT ((y0 & 1) != 0);
+  mp_double_limb_t g;
+  mp_limb_t cy;
+
+  do
+    {
+      while ((x0 & 1) == 0)
+	{
+	  x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1);
+	  x1 >>= 1;
+	}
+      while ((y0 & 1) == 0)
+	{
+	  y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1);
+	  y1 >>= 1;
+	}
+
+
+      if (x1 < y1 || (x1 == y1 && x0 < y0))
+	{
+	  mp_limb_t t;
+	  t = x1; x1 = y1; y1 = t;
+	  t = x0; x0 = y0; y0 = t;
+	}
+
+      cy = (x0 < y0);
+      x0 -= y0;
+      x1 -= y1 + cy;
+    }
+  while ((x1 | x0) != 0);
+
+  g.d1 = y1;
+  g.d0 = y0;
+  return g;
+}
+
+mp_limb_t
+refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
+{
+  mp_limb_t  x;
+  int  twos;
+
+  ASSERT (y != 0);
+  ASSERT (! refmpn_zero_p (xp, xsize));
+  ASSERT_MPN (xp, xsize);
+  ASSERT_LIMB (y);
+
+  x = refmpn_mod_1 (xp, xsize, y);
+  if (x == 0)
+    return y;
+
+  twos = 0;
+  while ((x & 1) == 0 && (y & 1) == 0)
+    {
+      x >>= 1;
+      y >>= 1;
+      twos++;
+    }
+
+  return refmpn_gcd_11 (x, y) << twos;
+}
+
+
+/* Based on the full limb x, not nails. */
+unsigned
+refmpn_count_leading_zeros (mp_limb_t x)
+{
+  unsigned  n = 0;
+
+  ASSERT (x != 0);
+
+  while ((x & GMP_LIMB_HIGHBIT) == 0)
+    {
+      x <<= 1;
+      n++;
+    }
+  return n;
+}
+
+/* Full limbs allowed, not limited to nails. */
+unsigned
+refmpn_count_trailing_zeros (mp_limb_t x)
+{
+  unsigned  n = 0;
+
+  ASSERT (x != 0);
+  ASSERT_LIMB (x);
+
+  while ((x & 1) == 0)
+    {
+      x >>= 1;
+      n++;
+    }
+  return n;
+}
+
+/* Strip factors of two (low zero bits) from {p,size} by right shifting.
+   The return value is the number of twos stripped.  */
+mp_size_t
+refmpn_strip_twos (mp_ptr p, mp_size_t size)
+{
+  mp_size_t  limbs;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (! refmpn_zero_p (p, size));
+  ASSERT_MPN (p, size);
+
+  for (limbs = 0; p[0] == 0; limbs++)
+    {
+      refmpn_copyi (p, p+1, size-1);
+      p[size-1] = 0;
+    }
+
+  shift = refmpn_count_trailing_zeros (p[0]);
+  if (shift)
+    refmpn_rshift (p, p, size, shift);
+
+  return limbs*GMP_NUMB_BITS + shift;
+}
+
+mp_limb_t
+refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
+{
+  int       cmp;
+
+  ASSERT (ysize >= 1);
+  ASSERT (xsize >= ysize);
+  ASSERT ((xp[0] & 1) != 0);
+  ASSERT ((yp[0] & 1) != 0);
+  /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
+  ASSERT (yp[ysize-1] != 0);
+  ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
+  ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
+  ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
+  if (xsize == ysize)
+    ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
+  ASSERT_MPN (xp, xsize);
+  ASSERT_MPN (yp, ysize);
+
+  refmpn_strip_twos (xp, xsize);
+  MPN_NORMALIZE (xp, xsize);
+  MPN_NORMALIZE (yp, ysize);
+
+  for (;;)
+    {
+      cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
+      if (cmp == 0)
+	break;
+      if (cmp < 0)
+	MPN_PTR_SWAP (xp,xsize, yp,ysize);
+
+      ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
+
+      refmpn_strip_twos (xp, xsize);
+      MPN_NORMALIZE (xp, xsize);
+    }
+
+  refmpn_copyi (gp, xp, xsize);
+  return xsize;
+}
+
+unsigned long
+ref_popc_limb (mp_limb_t src)
+{
+  unsigned long  count;
+  int  i;
+
+  count = 0;
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      count += (src & 1);
+      src >>= 1;
+    }
+  return count;
+}
+
+unsigned long
+refmpn_popcount (mp_srcptr sp, mp_size_t size)
+{
+  unsigned long  count = 0;
+  mp_size_t  i;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (sp, size);
+
+  for (i = 0; i < size; i++)
+    count += ref_popc_limb (sp[i]);
+  return count;
+}
+
+unsigned long
+refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  mp_ptr  d;
+  unsigned long  count;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (s1p, size);
+  ASSERT_MPN (s2p, size);
+
+  if (size == 0)
+    return 0;
+
+  d = refmpn_malloc_limbs (size);
+  refmpn_xor_n (d, s1p, s2p, size);
+  count = refmpn_popcount (d, size);
+  free (d);
+  return count;
+}
+
+
+/* set r to a%d */
+void
+refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
+{
+  mp_limb_t  D[2];
+  int        n;
+
+  ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
+  ASSERT_MPN (a, 2);
+  ASSERT_MPN (d, 2);
+
+  D[1] = d[1], D[0] = d[0];
+  r[1] = a[1], r[0] = a[0];
+  n = 0;
+
+  for (;;)
+    {
+      if (D[1] & GMP_NUMB_HIGHBIT)
+	break;
+      if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
+	break;
+      refmpn_lshift (D, D, (mp_size_t) 2, 1);
+      n++;
+      ASSERT (n <= GMP_NUMB_BITS);
+    }
+
+  while (n >= 0)
+    {
+      if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
+	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
+      refmpn_rshift (D, D, (mp_size_t) 2, 1);
+      n--;
+    }
+
+  ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
+}
+
+
+
+/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
+   particular the trial quotient is allowed to be 2 too big. */
+mp_limb_t
+refmpn_sb_div_qr (mp_ptr qp,
+		  mp_ptr np, mp_size_t nsize,
+		  mp_srcptr dp, mp_size_t dsize)
+{
+  mp_limb_t  retval = 0;
+  mp_size_t  i;
+  mp_limb_t  d1 = dp[dsize-1];
+  mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
+
+  ASSERT (nsize >= dsize);
+  /* ASSERT (dsize > 2); */
+  ASSERT (dsize >= 2);
+  ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
+  ASSERT_MPN (np, nsize);
+  ASSERT_MPN (dp, dsize);
+
+  i = nsize-dsize;
+  if (refmpn_cmp (np+i, dp, dsize) >= 0)
+    {
+      ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
+      retval = 1;
+    }
+
+  for (i--; i >= 0; i--)
+    {
+      mp_limb_t  n0 = np[i+dsize];
+      mp_limb_t  n1 = np[i+dsize-1];
+      mp_limb_t  q, dummy_r;
+
+      ASSERT (n0 <= d1);
+      if (n0 == d1)
+	q = GMP_NUMB_MAX;
+      else
+	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
+			       d1 << GMP_NAIL_BITS);
+
+      n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
+      ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
+      if (n0)
+	{
+	  q--;
+	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
+	    {
+	      q--;
+	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
+	    }
+	}
+      np[i+dsize] = 0;
+
+      qp[i] = q;
+    }
+
+  /* remainder < divisor */
+#if 0		/* ASSERT triggers gcc 4.2.1 bug */
+  ASSERT (refmpn_cmp (np, dp, dsize) < 0);
+#endif
+
+  /* multiply back to original */
+  {
+    mp_ptr  mp = refmpn_malloc_limbs (nsize);
+
+    refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
+    if (retval)
+      ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
+    ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
+    ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
+
+    free (mp);
+  }
+
+  free (np_orig);
+  return retval;
+}
+
+/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
+   particular the trial quotient is allowed to be 2 too big. */
+void
+refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+		mp_ptr np, mp_size_t nsize,
+		mp_srcptr dp, mp_size_t dsize)
+{
+  ASSERT (qxn == 0);
+  ASSERT_MPN (np, nsize);
+  ASSERT_MPN (dp, dsize);
+  ASSERT (dsize > 0);
+  ASSERT (dp[dsize-1] != 0);
+
+  if (dsize == 1)
+    {
+      rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
+      return;
+    }
+  else
+    {
+      mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
+      mp_ptr  d2p = refmpn_malloc_limbs (dsize);
+      int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
+
+      n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
+      ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
+
+      refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
+      refmpn_rshift_or_copy (rp, n2p, dsize, norm);
+
+      /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
+      free (n2p);
+      free (d2p);
+    }
+}
+
+mp_limb_t
+refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT_MPN (up, 2*n);
+  /* ASSERT about directed overlap rp, up */
+  /* ASSERT about overlap rp, mp */
+  /* ASSERT about overlap up, mp */
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      up++;
+    }
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
+}
+
+size_t
+refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
+{
+  unsigned char  *d;
+  size_t  dsize;
+
+  ASSERT (size >= 0);
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+  ASSERT (size == 0 || src[size-1] != 0);
+  ASSERT_MPN (src, size);
+
+  MPN_SIZEINBASE (dsize, src, size, base);
+  ASSERT (dsize >= 1);
+  ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
+
+  if (size == 0)
+    {
+      dst[0] = 0;
+      return 1;
+    }
+
+  /* don't clobber input for power of 2 bases */
+  if (POW2_P (base))
+    src = refmpn_memdup_limbs (src, size);
+
+  d = dst + dsize;
+  do
+    {
+      d--;
+      ASSERT (d >= dst);
+      *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
+      size -= (src[size-1] == 0);
+    }
+  while (size != 0);
+
+  /* Move result back and decrement dsize if we didn't generate
+     the maximum possible digits.  */
+  if (d != dst)
+    {
+      size_t i;
+      dsize -= d - dst;
+      for (i = 0; i < dsize; i++)
+	dst[i] = d[i];
+    }
+
+  if (POW2_P (base))
+    free (src);
+
+  return dsize;
+}
+
+
+mp_limb_t
+ref_bswap_limb (mp_limb_t src)
+{
+  mp_limb_t  dst;
+  int        i;
+
+  dst = 0;
+  for (i = 0; i < GMP_LIMB_BYTES; i++)
+    {
+      dst = (dst << 8) + (src & 0xFF);
+      src >>= 8;
+    }
+  return dst;
+}
+
+
+/* These random functions are mostly for transitional purposes while adding
+   nail support, since they're independent of the normal mpn routines.  They
+   can probably be removed when those normal routines are reliable, though
+   perhaps something independent would still be useful at times.  */
+
+#if GMP_LIMB_BITS == 32
+#define RAND_A  CNST_LIMB(0x29CF535)
+#endif
+#if GMP_LIMB_BITS == 64
+#define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
+#endif
+
+mp_limb_t  refmpn_random_seed;
+
+mp_limb_t
+refmpn_random_half (void)
+{
+  refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
+  return (refmpn_random_seed >> GMP_LIMB_BITS/2);
+}
+
+mp_limb_t
+refmpn_random_limb (void)
+{
+  return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
+	   | refmpn_random_half ()) & GMP_NUMB_MASK;
+}
+
+void
+refmpn_random (mp_ptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  if (GMP_NAIL_BITS == 0)
+    {
+      mpn_random (ptr, size);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    ptr[i] = refmpn_random_limb ();
+}
+
+void
+refmpn_random2 (mp_ptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  mp_limb_t  bit, mask, limb;
+  int        run;
+
+  if (GMP_NAIL_BITS == 0)
+    {
+      mpn_random2 (ptr, size);
+      return;
+    }
+
+#define RUN_MODULUS  32
+
+  /* start with ones at a random pos in the high limb */
+  bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
+  mask = 0;
+  run = 0;
+
+  for (i = size-1; i >= 0; i--)
+    {
+      limb = 0;
+      do
+	{
+	  if (run == 0)
+	    {
+	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
+	      mask = ~mask;
+	    }
+
+	  limb |= (bit & mask);
+	  bit >>= 1;
+	  run--;
+	}
+      while (bit != 0);
+
+      ptr[i] = limb;
+      bit = GMP_NUMB_HIGHBIT;
+    }
+}
+
+/* This is a simple bitwise algorithm working high to low across "s" and
+   testing each time whether setting the bit would make s^2 exceed n.  */
+mp_size_t
+refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
+{
+  mp_ptr     tp, dp;
+  mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
+  unsigned   ibit;
+  long       i;
+  mp_limb_t  c;
+
+  ASSERT (nsize >= 0);
+
+  /* If n==0, then s=0 and r=0.  */
+  if (nsize == 0)
+    return 0;
+
+  ASSERT (np[nsize - 1] != 0);
+  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
+  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
+  ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
+
+  /* root */
+  ssize = (nsize+1)/2;
+  refmpn_zero (sp, ssize);
+
+  /* the remainder so far */
+  dp = refmpn_memdup_limbs (np, nsize);
+  dsize = nsize;
+
+  /* temporary */
+  talloc = 2*ssize + 1;
+  tp = refmpn_malloc_limbs (talloc);
+
+  for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
+    {
+      /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
+	 is added to it */
+
+      ilimbs = (i+1) / GMP_NUMB_BITS;
+      ibit = (i+1) % GMP_NUMB_BITS;
+      refmpn_zero (tp, ilimbs);
+      c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
+      tsize = ilimbs + ssize;
+      tp[tsize] = c;
+      tsize += (c != 0);
+
+      ilimbs = (2*i) / GMP_NUMB_BITS;
+      ibit = (2*i) % GMP_NUMB_BITS;
+      if (ilimbs + 1 > tsize)
+	{
+	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
+	  tsize = ilimbs + 1;
+	}
+      c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
+			CNST_LIMB(1) << ibit);
+      ASSERT (tsize < talloc);
+      tp[tsize] = c;
+      tsize += (c != 0);
+
+      if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
+	{
+	  /* set this bit in s and subtract from the remainder */
+	  refmpn_setbit (sp, i);
+
+	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
+	  dsize = refmpn_normalize (dp, dsize);
+	}
+    }
+
+  if (rp == NULL)
+    {
+      ret = ! refmpn_zero_p (dp, dsize);
+    }
+  else
+    {
+      ASSERT (dsize == 0 || dp[dsize-1] != 0);
+      refmpn_copy (rp, dp, dsize);
+      ret = dsize;
+    }
+
+  free (dp);
+  free (tp);
+  return ret;
+}

diff --git a/tests/refmpq.c b/tests/refmpq.c
new file mode 100644
index 0000000..8a2fc7a
--- /dev/null
+++ b/tests/refmpq.c

@@ -0,0 +1,40 @@
+/* Reference rational routines.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+refmpq_add (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)
+{
+  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));
+  mpz_addmul (mpq_numref(w), mpq_denref(x), mpq_numref(y));
+  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));
+  mpq_canonicalize (w);
+}
+
+void
+refmpq_sub (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)
+{
+  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));
+  mpz_submul (mpq_numref(w), mpq_denref(x), mpq_numref(y));
+  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));
+  mpq_canonicalize (w);
+}

diff --git a/tests/refmpz.c b/tests/refmpz.c
new file mode 100644
index 0000000..167799f
--- /dev/null
+++ b/tests/refmpz.c

@@ -0,0 +1,343 @@
+/* Reference mpz functions.
+
+Copyright 1997, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+/* always do assertion checking */
+#define WANT_ASSERT  1
+
+#include <stdio.h>
+#include <stdlib.h> /* for free */
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* FIXME: Shouldn't use plain mpz functions in a reference routine. */
+void
+refmpz_combit (mpz_ptr r, unsigned long bit)
+{
+  if (mpz_tstbit (r, bit))
+    mpz_clrbit (r, bit);
+  else
+    mpz_setbit (r, bit);
+}
+
+
+unsigned long
+refmpz_hamdist (mpz_srcptr x, mpz_srcptr y)
+{
+  mp_size_t      xsize, ysize, tsize;
+  mp_ptr         xp, yp;
+  unsigned long  ret;
+
+  if ((SIZ(x) < 0 && SIZ(y) >= 0)
+      || (SIZ(y) < 0 && SIZ(x) >= 0))
+    return ULONG_MAX;
+
+  xsize = ABSIZ(x);
+  ysize = ABSIZ(y);
+  tsize = MAX (xsize, ysize);
+
+  xp = refmpn_malloc_limbs (tsize);
+  refmpn_zero (xp, tsize);
+  refmpn_copy (xp, PTR(x), xsize);
+
+  yp = refmpn_malloc_limbs (tsize);
+  refmpn_zero (yp, tsize);
+  refmpn_copy (yp, PTR(y), ysize);
+
+  if (SIZ(x) < 0)
+    refmpn_neg (xp, xp, tsize);
+
+  if (SIZ(x) < 0)
+    refmpn_neg (yp, yp, tsize);
+
+  ret = refmpn_hamdist (xp, yp, tsize);
+
+  free (xp);
+  free (yp);
+  return ret;
+}
+
+void
+refmpz_gcd (mpz_ptr g, mpz_srcptr a_orig, mpz_srcptr b_orig)
+{
+  mp_bitcnt_t a_twos, b_twos, common_twos;
+  mpz_t a;
+  mpz_t b;
+  mpz_init (a);
+  mpz_init (b);
+  mpz_abs (a, a_orig);
+  mpz_abs (b, b_orig);
+
+  if (mpz_sgn (a) == 0)
+    {
+      mpz_set (g, b);
+      return;
+    }
+  if (mpz_sgn (b) == 0)
+    {
+      mpz_set (g, a);
+      return;
+    }
+  a_twos = mpz_scan1 (a, 0);
+  mpz_tdiv_q_2exp (a, a, a_twos);
+
+  b_twos = mpz_scan1 (b, 0);
+  mpz_tdiv_q_2exp (b, b, b_twos);
+
+  common_twos = MIN(a_twos, b_twos);
+  for (;;)
+    {
+      int c;
+      mp_bitcnt_t twos;
+      c = mpz_cmp (a, b);
+      if (c == 0)
+	break;
+      if (c < 0)
+	mpz_swap (a, b);
+      mpz_sub (a, a, b);
+      twos = mpz_scan1 (a, 0);
+      mpz_tdiv_q_2exp (a, a, twos);
+    }
+  mpz_mul_2exp (g, a, common_twos);
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
+
+/* (0/b), with mpz b; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0Z(b)  JACOBI_0LS (PTR(b)[0], SIZ(b))
+
+/* (a/b) effect due to sign of b: mpz/mpz */
+#define JACOBI_BSGN_ZZ_BIT1(a, b)   JACOBI_BSGN_SS_BIT1 (SIZ(a), SIZ(b))
+
+/* (a/b) effect due to sign of a: mpz/unsigned-mpz, b odd;
+   is (-1/b) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_ZZU_BIT1(a, b)  JACOBI_ASGN_SU_BIT1 (SIZ(a), PTR(b)[0])
+
+int
+refmpz_kronecker (mpz_srcptr a_orig, mpz_srcptr b_orig)
+{
+  unsigned long  twos;
+  mpz_t  a, b;
+  int    result_bit1 = 0;
+
+  if (mpz_sgn (b_orig) == 0)
+    return JACOBI_Z0 (a_orig);  /* (a/0) */
+
+  if (mpz_sgn (a_orig) == 0)
+    return JACOBI_0Z (b_orig);  /* (0/b) */
+
+  if (mpz_even_p (a_orig) && mpz_even_p (b_orig))
+    return 0;
+
+  if (mpz_cmp_ui (b_orig, 1) == 0)
+    return 1;
+
+  mpz_init_set (a, a_orig);
+  mpz_init_set (b, b_orig);
+
+  if (mpz_sgn (b) < 0)
+    {
+      result_bit1 ^= JACOBI_BSGN_ZZ_BIT1 (a, b);
+      mpz_neg (b, b);
+    }
+  if (mpz_even_p (b))
+    {
+      twos = mpz_scan1 (b, 0L);
+      mpz_tdiv_q_2exp (b, b, twos);
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]);
+    }
+
+  if (mpz_sgn (a) < 0)
+    {
+      result_bit1 ^= JACOBI_N1B_BIT1 (PTR(b)[0]);
+      mpz_neg (a, a);
+    }
+  if (mpz_even_p (a))
+    {
+      twos = mpz_scan1 (a, 0L);
+      mpz_tdiv_q_2exp (a, a, twos);
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);
+    }
+
+  for (;;)
+    {
+      ASSERT (mpz_odd_p (a));
+      ASSERT (mpz_odd_p (b));
+      ASSERT (mpz_sgn (a) > 0);
+      ASSERT (mpz_sgn (b) > 0);
+
+      TRACE (printf ("top\n");
+	     mpz_trace (" a", a);
+	     mpz_trace (" b", b));
+
+      if (mpz_cmp (a, b) < 0)
+	{
+	  TRACE (printf ("swap\n"));
+	  mpz_swap (a, b);
+	  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (PTR(a)[0], PTR(b)[0]);
+	}
+
+      if (mpz_cmp_ui (b, 1) == 0)
+	break;
+
+      mpz_sub (a, a, b);
+      TRACE (printf ("sub\n");
+	     mpz_trace (" a", a));
+      if (mpz_sgn (a) == 0)
+	goto zero;
+
+      twos = mpz_scan1 (a, 0L);
+      mpz_fdiv_q_2exp (a, a, twos);
+      TRACE (printf ("twos %lu\n", twos);
+	     mpz_trace (" a", a));
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  return JACOBI_BIT1_TO_PN (result_bit1);
+
+ zero:
+  mpz_clear (a);
+  mpz_clear (b);
+  return 0;
+}
+
+/* Same as mpz_kronecker, but ignoring factors of 2 on b */
+int
+refmpz_jacobi (mpz_srcptr a, mpz_srcptr b)
+{
+  ASSERT_ALWAYS (mpz_sgn (b) > 0);
+  ASSERT_ALWAYS (mpz_odd_p (b));
+
+  return refmpz_kronecker (a, b);
+}
+
+/* Legendre symbol via powm. p must be an odd prime. */
+int
+refmpz_legendre (mpz_srcptr a, mpz_srcptr p)
+{
+  int res;
+
+  mpz_t r;
+  mpz_t e;
+
+  ASSERT_ALWAYS (mpz_sgn (p) > 0);
+  ASSERT_ALWAYS (mpz_odd_p (p));
+
+  mpz_init (r);
+  mpz_init (e);
+
+  mpz_fdiv_r (r, a, p);
+
+  mpz_set (e, p);
+  mpz_sub_ui (e, e, 1);
+  mpz_fdiv_q_2exp (e, e, 1);
+  mpz_powm (r, r, e, p);
+
+  /* Normalize to a more or less symmetric range around zero */
+  if (mpz_cmp (r, e) > 0)
+    mpz_sub (r, r, p);
+
+  ASSERT_ALWAYS (mpz_cmpabs_ui (r, 1) <= 0);
+
+  res = mpz_sgn (r);
+
+  mpz_clear (r);
+  mpz_clear (e);
+
+  return res;
+}
+
+
+int
+refmpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+{
+  mpz_t  bz;
+  int    ret;
+  mpz_init_set_ui (bz, b);
+  ret = refmpz_kronecker (a, bz);
+  mpz_clear (bz);
+  return ret;
+}
+
+int
+refmpz_kronecker_si (mpz_srcptr a, long b)
+{
+  mpz_t  bz;
+  int    ret;
+  mpz_init_set_si (bz, b);
+  ret = refmpz_kronecker (a, bz);
+  mpz_clear (bz);
+  return ret;
+}
+
+int
+refmpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+{
+  mpz_t  az;
+  int    ret;
+  mpz_init_set_ui (az, a);
+  ret = refmpz_kronecker (az, b);
+  mpz_clear (az);
+  return ret;
+}
+
+int
+refmpz_si_kronecker (long a, mpz_srcptr b)
+{
+  mpz_t  az;
+  int    ret;
+  mpz_init_set_si (az, a);
+  ret = refmpz_kronecker (az, b);
+  mpz_clear (az);
+  return ret;
+}
+
+
+void
+refmpz_pow_ui (mpz_ptr w, mpz_srcptr b, unsigned long e)
+{
+  mpz_t          s, t;
+  unsigned long  i;
+
+  mpz_init_set_ui (t, 1L);
+  mpz_init_set (s, b);
+
+  if ((e & 1) != 0)
+    mpz_mul (t, t, s);
+
+  for (i = 2; i <= e; i <<= 1)
+    {
+      mpz_mul (s, s, s);
+      if ((i & e) != 0)
+	mpz_mul (t, t, s);
+    }
+
+  mpz_set (w, t);
+
+  mpz_clear (s);
+  mpz_clear (t);
+}

diff --git a/tests/spinner.c b/tests/spinner.c
new file mode 100644
index 0000000..13448d0
--- /dev/null
+++ b/tests/spinner.c

@@ -0,0 +1,128 @@
+/* A stupid little spinning wheel designed to make it look like useful work
+   is being done.
+
+Copyright 1999-2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>     /* for isatty */
+#endif
+
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* "alarm" is not available on mingw32, and the SIGALRM constant is not
+   defined.  Don't bother with a spinner in this case.  */
+#if ! HAVE_ALARM || ! defined (SIGALRM)
+#define alarm(n)          abort()
+#define signal(sig,func)  SIG_ERR
+#endif
+
+
+/* An application can update this to get a count printed with the spinner.
+   If left at 0, no count is printed. */
+
+unsigned long  spinner_count = 0;
+
+
+int  spinner_wanted = -1;  /* -1 uninitialized, 1 wanted, 0 not */
+int  spinner_tick = 1;     /* 1 ready to print, 0 not */
+
+
+/*ARGSUSED*/
+RETSIGTYPE
+spinner_signal (int signum)
+{
+  spinner_tick = 1;
+
+  if (signal (SIGALRM, spinner_signal) == SIG_ERR)
+    {
+      printf ("spinner_signal(): Oops, cannot reinstall SIGALRM\n");
+      abort ();
+    }
+  alarm (1);
+}
+
+
+/* Initialize the spinner.
+
+   This is done the first time spinner() is called, so an application
+   doesn't need to call this directly.
+
+   The spinner is only wanted if the output is a tty.  */
+
+#define SPINNER_WANTED_INIT() \
+  if (spinner_wanted < 0) spinner_init ()
+
+void
+spinner_init (void)
+{
+  spinner_wanted = isatty (STDOUT_FILENO);
+  if (spinner_wanted == -1)
+    abort ();
+
+  if (!spinner_wanted)
+    return;
+
+  if (signal (SIGALRM, spinner_signal) == SIG_ERR)
+    {
+      printf ("(no spinner)\r");
+      spinner_tick = 0;
+      return;
+    }
+  alarm (1);
+
+  /* unbuffered output so the spinner will show up */
+  setbuf (stdout, NULL);
+}
+
+
+void
+spinner (void)
+{
+  static const char  data[] = { '|', '/', '-', '\\' };
+  static int         pos = 0;
+
+  char  buf[128];
+
+  SPINNER_WANTED_INIT ();
+
+  if (spinner_tick)
+    {
+      buf[0] = data[pos];
+      pos = (pos + 1) % numberof (data);
+      spinner_tick = 0;
+
+      if (spinner_count != 0)
+	{
+	  sprintf (buf+1, " %lu\r", spinner_count);
+	}
+      else
+	{
+	  buf[1] = '\r';
+	  buf[2] = '\0';
+	}
+      fputs (buf, stdout);
+    }
+}

diff --git a/tests/t-bswap.c b/tests/t-bswap.c
new file mode 100644
index 0000000..765ef8e
--- /dev/null
+++ b/tests/t-bswap.c

@@ -0,0 +1,70 @@
+/* Test BSWAP_LIMB.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int
+main (void)
+{
+  mp_limb_t  src, want, got;
+  int        i;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  for (i = 0; i < 1000; i++)
+    {
+      mpn_random (&src, (mp_size_t) 1);
+
+      want = ref_bswap_limb (src);
+
+      BSWAP_LIMB (got, src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB wrong result\n");
+        error:
+          mpn_trace ("  src ", &src,  (mp_size_t) 1);
+          mpn_trace ("  want", &want, (mp_size_t) 1);
+          mpn_trace ("  got ", &got,  (mp_size_t) 1);
+          abort ();
+        }
+
+      BSWAP_LIMB_FETCH (got, &src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB_FETCH wrong result\n");
+          goto error;
+        }
+
+      BSWAP_LIMB_STORE (&got, src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB_STORE wrong result\n");
+          goto error;
+        }
+    }
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/t-constants.c b/tests/t-constants.c
new file mode 100644
index 0000000..35714c7
--- /dev/null
+++ b/tests/t-constants.c

@@ -0,0 +1,343 @@
+/* Check the values of some constants.
+
+Copyright 2000-2003, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#ifdef ULONG_MAX
+const char *ulong_max_def = "defined";
+#else
+const char *ulong_max_def = "not defined";
+#endif
+#ifdef LONG_MAX
+const char *long_max_def = "defined";
+#else
+const char *long_max_def = "not defined";
+#endif
+
+#ifdef UINT_MAX
+const char *uint_max_def = "defined";
+#else
+const char *uint_max_def = "not defined";
+#endif
+#ifdef INT_MAX
+const char *int_max_def = "defined";
+#else
+const char *int_max_def = "not defined";
+#endif
+
+#ifdef USHRT_MAX
+const char *ushrt_max_def = "defined";
+#else
+const char *ushrt_max_def = "not defined";
+#endif
+#ifdef SHRT_MAX
+const char *shrt_max_def = "defined";
+#else
+const char *shrt_max_def = "not defined";
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#ifdef _LONG_LONG_LIMB
+#define LL(l,ll)  ll
+#else
+#define LL(l,ll)  l
+#endif
+
+#if __GMP_MP_SIZE_T_INT
+#define SS(i,l)   i
+#else
+#define SS(i,l)   l
+#endif
+
+
+#define CHECK_LIMB_S(x, xname, y, yname)                \
+  do {                                                  \
+    if ((x) != (y))                                     \
+      {                                                 \
+        printf (LL("%s == %lx, but %s == %lx\n",        \
+                   "%s == %llx, but %s == %llx\n"),     \
+                xname, x, yname, y);                    \
+        error = 1;                                      \
+      }                                                 \
+  } while (0)
+
+#define CHECK_INT_S(x, xname, y, yname)                                 \
+  do {                                                                  \
+    if ((x) != (y))                                                     \
+      {                                                                 \
+        printf ("%s == %d, but %s == %d\n", xname, x, yname, y);        \
+        error = 1;                                                      \
+      }                                                                 \
+  } while (0)
+
+
+
+#define CHECK_CONDITION_S(x, xname)             \
+  do {                                          \
+    if (!(x))                                   \
+      {                                         \
+        printf ("%s is false\n", xname);        \
+        error = 1;                              \
+      }                                         \
+  } while (0)
+
+
+/* How many bits seem to work in the given type. */
+#define CALC_BITS(result, type) \
+  do {                          \
+    type  n = 1;                \
+    result = 0;                 \
+    while (n != 0)              \
+      {                         \
+        n <<= 1;                \
+        result++;               \
+      }                         \
+  } while (0)
+
+#define CHECK_BITS_S(constant, constant_name, type)     \
+  do {                                                  \
+    int   calculated;                                   \
+    CALC_BITS (calculated, type);                       \
+    if (calculated != constant)                         \
+      {                                                 \
+        printf ("%s == %d, but calculated %d\n",        \
+                constant_name, constant, calculated);   \
+        error = 1;                                      \
+      }                                                 \
+  } while (0)
+
+
+#define CHECK_HIGHBIT_S(value, value_name, type, format)        \
+  do {                                                          \
+    type  n = value;                                            \
+    if (n == 0)                                                 \
+      {                                                         \
+        printf ("%s == 0\n", value_name);                       \
+        error = 1;                                              \
+      }                                                         \
+    n <<= 1;                                                    \
+    if (n != 0)                                                 \
+      {                                                         \
+        printf ("%s << 1 = ", value_name);                      \
+        printf (format, n);                                     \
+        printf (" != 0\n");                                     \
+        error = 1;                                              \
+      }                                                         \
+  } while (0)
+
+
+#define CHECK_MAX_S(max_val, max_name, min_val, min_name, type, format) \
+  do {                                                                  \
+    type  maxval = max_val;                                             \
+    type  minval = min_val;                                             \
+    type  n = maxval;                                                   \
+    n++;                                                                \
+    if (n != minval)                                                    \
+      {                                                                 \
+        printf ("%s + 1 = ", max_name);                                 \
+        printf (format, n);                                             \
+        printf (" != %s = ", min_name);                                 \
+        printf (format, minval);                                        \
+        printf ("\n");                                                  \
+        error = 1;                                                      \
+      }                                                                 \
+    if (maxval <= minval)                                               \
+      {                                                                 \
+        printf ("%s = ", max_name);                                     \
+        printf (format, maxval);                                        \
+        printf (" <= %s = ", min_name);                                 \
+        printf (format, minval);                                        \
+        printf ("\n");                                                  \
+        error = 1;                                                      \
+      }                                                                 \
+  } while (0)
+
+
+#define CHECK_LIMB(x,y)      CHECK_LIMB_S (x, #x, y, #y)
+#define CHECK_INT(x,y)       CHECK_INT_S (x, #x, y, #y)
+#define CHECK_CONDITION(x)   CHECK_CONDITION_S (x, #x)
+#define CHECK_BITS(c,t)      CHECK_BITS_S (c, #c, t)
+#define CHECK_MAX(m,n,t,f)   CHECK_MAX_S (m, #m, n, #n, t, f)
+#define CHECK_HIGHBIT(n,t,f) CHECK_HIGHBIT_S (n, #n, t, f)
+
+
+/* The tests below marked "Bad!" fail on Cray T90 systems, where int, short
+   and mp_size_t are 48 bits or some such but don't wraparound in a plain
+   twos complement fashion.  In particular,
+
+       INT_HIGHBIT << 1 = 0xFFFFC00000000000 != 0
+       INT_MAX + 1 = 35184372088832 != INT_MIN = -35184372088832
+
+   This is a bit bizarre, but doesn't matter because GMP doesn't rely on any
+   particular overflow behaviour for int or short, only for mp_limb_t.  */
+
+int
+main (int argc, char *argv[])
+{
+  int  error = 0;
+
+  CHECK_INT (GMP_LIMB_BYTES, (int) sizeof(mp_limb_t));
+  CHECK_INT (mp_bits_per_limb, GMP_LIMB_BITS);
+
+  CHECK_BITS (GMP_LIMB_BITS, mp_limb_t);
+  CHECK_BITS (BITS_PER_ULONG, unsigned long);
+
+  CHECK_HIGHBIT (GMP_LIMB_HIGHBIT, mp_limb_t,      LL("0x%lX","0x%llX"));
+  CHECK_HIGHBIT (ULONG_HIGHBIT,     unsigned long,  "0x%lX");
+  CHECK_HIGHBIT (UINT_HIGHBIT,      unsigned int,   "0x%X");
+  CHECK_HIGHBIT (USHRT_HIGHBIT,     unsigned short, "0x%hX");
+#if 0 /* Bad! */
+  CHECK_HIGHBIT (LONG_HIGHBIT,      long,           "0x%lX");
+  CHECK_HIGHBIT (INT_HIGHBIT,       int,            "0x%X");
+  CHECK_HIGHBIT (SHRT_HIGHBIT,      short,          "0x%hX");
+#endif
+
+#if 0 /* Bad! */
+  CHECK_MAX (LONG_MAX,      LONG_MIN,      long,           "%ld");
+  CHECK_MAX (INT_MAX,       INT_MIN,       int,            "%d");
+  CHECK_MAX (SHRT_MAX,      SHRT_MIN,      short,          "%hd");
+#endif
+  CHECK_MAX (ULONG_MAX,     0,             unsigned long,  "%lu");
+  CHECK_MAX (UINT_MAX,      0,             unsigned int,   "%u");
+  CHECK_MAX (USHRT_MAX,     0,             unsigned short, "%hu");
+#if 0 /* Bad! */
+  CHECK_MAX (MP_SIZE_T_MAX, MP_SIZE_T_MIN, mp_size_t,      SS("%d","%ld"));
+#endif
+
+  /* UHWtype should have at least enough bits for half a UWtype */
+  {
+    int  bits_per_UWtype, bits_per_UHWtype;
+    CALC_BITS (bits_per_UWtype,  UWtype);
+    CALC_BITS (bits_per_UHWtype, UHWtype);
+    CHECK_CONDITION (2*bits_per_UHWtype >= bits_per_UWtype);
+  }
+
+  ASSERT_ALWAYS_LIMB (MODLIMB_INVERSE_3);
+  {
+    mp_limb_t  modlimb_inverse_3_calc;
+    binvert_limb (modlimb_inverse_3_calc, CNST_LIMB(3));
+    ASSERT_ALWAYS_LIMB (modlimb_inverse_3_calc);
+    CHECK_LIMB (MODLIMB_INVERSE_3, modlimb_inverse_3_calc);
+  }
+  {
+    mp_limb_t  MODLIMB_INVERSE_3_times_3
+      = (MODLIMB_INVERSE_3 * CNST_LIMB(3)) & GMP_NUMB_MASK;
+    CHECK_LIMB (MODLIMB_INVERSE_3_times_3, CNST_LIMB(1));
+  }
+
+  {
+    mp_limb_t  hi, lo;
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3-1,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi < 1))
+      {
+        printf ("GMP_NUMB_CEIL_MAX_DIV3 too big\n");
+        error = 1;
+      }
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi >= 1))
+      {
+        printf ("GMP_NUMB_CEIL_MAX_DIV3 too small\n");
+        error = 1;
+      }
+  }
+
+  {
+    mp_limb_t  hi, lo;
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3-1,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi < 2))
+      {
+        printf ("GMP_NUMB_CEIL_2MAX_DIV3 too big\n");
+        error = 1;
+      }
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi >= 2))
+      {
+        printf ("GMP_NUMB_CEIL_2MAX_DIV3 too small\n");
+        error = 1;
+      }
+  }
+
+#ifdef PP_INVERTED
+  {
+    mp_limb_t  pp_inverted_calc;
+    invert_limb (pp_inverted_calc, PP);
+    CHECK_LIMB (PP_INVERTED, pp_inverted_calc);
+  }
+#endif
+
+  if (argc >= 2 || error)
+    {
+      int  bits;
+
+      printf ("\n");
+      printf ("After gmp.h,\n");
+      printf ("  ULONG_MAX  %s\n", ulong_max_def);
+      printf ("  LONG_MAX   %s\n", long_max_def);
+      printf ("  UINT_MAX   %s\n", uint_max_def);
+      printf ("  INT_MAX    %s\n", int_max_def);
+      printf ("  USHRT_MAX  %s\n", ushrt_max_def);
+      printf ("  SHRT_MAX   %s\n", shrt_max_def);
+      printf ("\n");
+
+#ifdef _CRAY
+      printf ("_CRAY is defined, so limits.h is being used\n");
+#endif
+
+      printf ("ULONG_MAX      %lX\n", ULONG_MAX);
+      printf ("ULONG_HIGHBIT  %lX\n", ULONG_HIGHBIT);
+      printf ("LONG_MAX       %lX\n", LONG_MAX);
+      printf ("LONG_MIN       %lX\n", LONG_MIN);
+
+      printf ("UINT_MAX       %X\n", UINT_MAX);
+      printf ("UINT_HIGHBIT   %X\n", UINT_HIGHBIT);
+      printf ("INT_MAX        %X\n", INT_MAX);
+      printf ("INT_MIN        %X\n", INT_MIN);
+
+      printf ("USHRT_MAX      %X\n", USHRT_MAX);
+      printf ("USHRT_HIGHBIT  %X\n", USHRT_HIGHBIT);
+      printf ("SHRT_MAX       %X\n", SHRT_MAX);
+      printf ("SHRT_MIN       %X\n", SHRT_MIN);
+
+      printf ("\n");
+      printf ("Bits\n");
+      CALC_BITS (bits, long);           printf ("  long           %d\n", bits);
+      CALC_BITS (bits, int);            printf ("  int            %d\n", bits);
+      CALC_BITS (bits, short);          printf ("  short          %d\n", bits);
+      CALC_BITS (bits, unsigned long);  printf ("  unsigned long  %d\n", bits);
+      CALC_BITS (bits, unsigned int);   printf ("  unsigned int   %d\n", bits);
+      CALC_BITS (bits, unsigned short); printf ("  unsigned short %d\n", bits);
+      CALC_BITS (bits, mp_size_t);      printf ("  mp_size_t      %d\n", bits);
+    }
+
+  if (error)
+    abort ();
+
+  exit (0);
+}

diff --git a/tests/t-count_zeros.c b/tests/t-count_zeros.c
new file mode 100644
index 0000000..5bb243c
--- /dev/null
+++ b/tests/t-count_zeros.c

@@ -0,0 +1,86 @@
+/* Test count_leading_zeros and count_trailing_zeros.
+
+Copyright 2001-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void
+check_clz (int want, mp_limb_t n)
+{
+  int  got;
+  count_leading_zeros (got, n);
+  if (got != want)
+    {
+      printf        ("count_leading_zeros wrong\n");
+      mp_limb_trace ("  n    ", n);
+      printf        ("  want %d\n", want);
+      printf        ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_ctz (int want, mp_limb_t n)
+{
+  int  got;
+  count_trailing_zeros (got, n);
+  if (got != want)
+    {
+      printf ("count_trailing_zeros wrong\n");
+      mpn_trace ("  n    ", &n, (mp_size_t) 1);
+      printf    ("  want %d\n", want);
+      printf    ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_various (void)
+{
+  int        i;
+
+#ifdef COUNT_LEADING_ZEROS_0
+  check_clz (COUNT_LEADING_ZEROS_0, CNST_LIMB(0));
+#endif
+
+  for (i=0; i < GMP_LIMB_BITS; i++)
+    {
+      check_clz (i, CNST_LIMB(1) << (GMP_LIMB_BITS-1-i));
+      check_ctz (i, CNST_LIMB(1) << i);
+
+      check_ctz (i, MP_LIMB_T_MAX << i);
+      check_clz (i, MP_LIMB_T_MAX >> i);
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/t-hightomask.c b/tests/t-hightomask.c
new file mode 100644
index 0000000..3c65d03
--- /dev/null
+++ b/tests/t-hightomask.c

@@ -0,0 +1,42 @@
+/* Test LIMB_HIGHBIT_TO_MASK.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* There's very little to these tests, but it's nice to have them since if
+   something has gone wrong with the arithmetic right shift business in
+   LIMB_HIGHBIT_TO_MASK then the only symptom is likely to be failures in
+   udiv_qrnnd_preinv, which would not be easy to diagnose.  */
+
+int
+main (void)
+{
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (0) == 0);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT) == MP_LIMB_T_MAX);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX) == MP_LIMB_T_MAX);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT >> 1) == 0);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX >> 1) == 0);
+
+  exit (0);
+}

diff --git a/tests/t-modlinv.c b/tests/t-modlinv.c
new file mode 100644
index 0000000..47f5533
--- /dev/null
+++ b/tests/t-modlinv.c

@@ -0,0 +1,83 @@
+/* Test binvert_limb.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+void
+one (mp_limb_t n)
+{
+  mp_limb_t  inv, prod;
+
+  binvert_limb (inv, n);
+  prod = (inv * n) & GMP_NUMB_MASK;
+  if (prod != 1)
+    {
+      printf ("binvert_limb wrong\n");
+      mp_limb_trace ("  n       ", n);
+      mp_limb_trace ("  got     ", inv);
+      mp_limb_trace ("  product ", prod);
+      abort ();
+    }
+}
+
+void
+some (void)
+{
+  int  i;
+  for (i = 0; i < 10000; i++)
+    one (refmpn_random_limb () | 1);
+}
+
+void
+all (void)
+{
+  mp_limb_t  n;
+
+  n = 1;
+  do {
+    one (n);
+    n += 2;
+  } while (n != 1);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  if (argc >= 2 && strcmp (argv[1], "-a") == 0)
+    {
+      /* it's feasible to run all values on a 32-bit limb, but not a 64-bit */
+      all ();
+    }
+  else
+    {
+      some ();
+    }
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/t-parity.c b/tests/t-parity.c
new file mode 100644
index 0000000..b6f2366
--- /dev/null
+++ b/tests/t-parity.c

@@ -0,0 +1,66 @@
+/* Test ULONG_PARITY.
+
+Copyright 2002, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_one (int want, unsigned long n)
+{
+  int  got;
+  ULONG_PARITY (got, n);
+  if (got != want)
+    {
+      printf ("ULONG_PARITY wrong\n");
+      printf ("  n    %lX\n", n);
+      printf ("  want %d\n", want);
+      printf ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_various (void)
+{
+  int  i;
+
+  check_one (0, 0L);
+  check_one (BITS_PER_ULONG & 1, ULONG_MAX);
+  check_one (0, 0x11L);
+  check_one (1, 0x111L);
+  check_one (1, 0x3111L);
+
+  for (i = 0; i < BITS_PER_ULONG; i++)
+    check_one (1, 1UL << i);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/t-popc.c b/tests/t-popc.c
new file mode 100644
index 0000000..421d4e2
--- /dev/null
+++ b/tests/t-popc.c

@@ -0,0 +1,79 @@
+/* Test popc_limb.
+
+Copyright 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int
+main (void)
+{
+  mp_limb_t  src, want, got;
+  int        i;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      src = CNST_LIMB(1) << i;
+      want = 1;
+
+      popc_limb (got, src);
+      if (got != want)
+        {
+        error:
+          printf ("popc_limb wrong result\n");
+          mpn_trace ("  src ", &src,  (mp_size_t) 1);
+          mpn_trace ("  want", &want, (mp_size_t) 1);
+          mpn_trace ("  got ", &got,  (mp_size_t) 1);
+          abort ();
+        }
+    }
+
+  src = 0;
+  want = 0;
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      src += CNST_LIMB(1) << i;
+      want += 1;
+
+      popc_limb (got, src);
+      if (got != want)
+        {
+	  goto error;
+        }
+    }
+
+  for (i = 0; i < 100; i++)
+    {
+      mpn_random2 (&src, (mp_size_t) 1);
+      want = ref_popc_limb (src);
+
+      popc_limb (got, src);
+      if (got != want)
+        goto error;
+    }
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/t-sub.c b/tests/t-sub.c
new file mode 100644
index 0000000..e916d4f
--- /dev/null
+++ b/tests/t-sub.c

@@ -0,0 +1,114 @@
+/* Test sub_ddmmss.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+#define M  MP_LIMB_T_MAX
+
+  static const struct {
+    mp_limb_t  want_dh,want_dl, mh,ml, sh,sl;
+  } data[] = {
+    { 0,0,  0,0,  0,0 },
+    { 0,0,  0,1,  0,1 },
+    { 0,0,  1,2,  1,2 },
+
+    { 0,1,  0,2,  0,1 },
+    { 0,M,  1,0,  0,1 },
+    { M,M,  0,0,  0,1 },
+
+    { M,M,  0,M-1,  0,M },
+    { 0,0,  0,M-1,  0,M-1 },
+    { 0,1,  0,M-1,  0,M-2 },
+  };
+  int  i;
+  mp_limb_t  got_dh, got_dl;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      sub_ddmmss (got_dh,got_dl, data[i].mh,data[i].ml, data[i].sh,data[i].sl);
+      if (got_dh != data[i].want_dh || got_dl != data[i].want_dl)
+        {
+          printf ("check_data wrong at data[%d]\n", i);
+          mp_limb_trace ("  mh", data[i].mh);
+          mp_limb_trace ("  ml", data[i].ml);
+          mp_limb_trace ("  sh", data[i].sh);
+          mp_limb_trace ("  sl", data[i].sl);
+          mp_limb_trace ("  want dh", data[i].want_dh);
+          mp_limb_trace ("  want dl", data[i].want_dl);
+          mp_limb_trace ("  got dh ", got_dh);
+          mp_limb_trace ("  got dl ", got_dl);
+          abort ();
+        }
+    }
+}
+
+void
+check_random (void)
+{
+  mp_limb_t  want_dh,want_dl, got_dh,got_dl, mh,ml, sh,sl;
+  int  i;
+
+  for (i = 0; i < 20; i++)
+    {
+      mh = urandom ();
+      ml = urandom ();
+      sh = urandom ();
+      sl = urandom ();
+
+      refmpn_sub_ddmmss (&want_dh,&want_dl, mh,ml, sh,sl);
+
+      sub_ddmmss (got_dh,got_dl, mh,ml, sh,sl);
+
+      if (got_dh != want_dh || got_dl != want_dl)
+        {
+          printf ("check_data wrong at data[%d]\n", i);
+          mp_limb_trace ("  mh", mh);
+          mp_limb_trace ("  ml", ml);
+          mp_limb_trace ("  sh", sh);
+          mp_limb_trace ("  sl", sl);
+          mp_limb_trace ("  want dh", want_dh);
+          mp_limb_trace ("  want dl", want_dl);
+          mp_limb_trace ("  got dh ", got_dh);
+          mp_limb_trace ("  got dl ", got_dl);
+          abort ();
+        }
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+  check_random ();
+
+  tests_end ();
+  exit (0);
+}

diff --git a/tests/tests.h b/tests/tests.h
new file mode 100644
index 0000000..882c634
--- /dev/null
+++ b/tests/tests.h

@@ -0,0 +1,447 @@
+/* Tests support prototypes etc.
+
+Copyright 2000-2004, 2008-2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+#ifndef __TESTS_H__
+#define __TESTS_H__
+
+#include "config.h"
+
+#include <setjmp.h>  /* for jmp_buf */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#ifdef __cplusplus
+#define ANYARGS  ...
+#else
+#define ANYARGS
+#endif
+
+
+void tests_start (void);
+void tests_end (void);
+
+void tests_memory_start (void);
+void tests_memory_end (void);
+void *tests_allocate (size_t);
+void *tests_reallocate (void *, size_t, size_t);
+void tests_free (void *, size_t);
+void tests_free_nosize (void *);
+int tests_memory_valid (void *);
+
+void tests_rand_start (void);
+void tests_rand_end (void);
+
+double tests_infinity_d ();
+int tests_hardware_getround (void);
+int tests_hardware_setround (int);
+int tests_isinf (double);
+int tests_dbl_mant_bits (void);
+
+void x86_fldcw (unsigned short);
+unsigned short x86_fstcw (void);
+
+
+/* tests_setjmp_sigfpe is like a setjmp, establishing a trap for SIGFPE.
+   The initial return is 0, if SIGFPE is trapped execution goes back there
+   with return value 1.
+
+   tests_sigfpe_done puts SIGFPE back to SIG_DFL, which should be used once
+   the setjmp point is out of scope, so a later SIGFPE won't try to go back
+   there.  */
+
+#define tests_setjmp_sigfpe()                   \
+  (signal (SIGFPE, tests_sigfpe_handler),       \
+   setjmp (tests_sigfpe_target))
+
+RETSIGTYPE tests_sigfpe_handler (int);
+void tests_sigfpe_done (void);
+extern jmp_buf  tests_sigfpe_target;
+
+
+#if HAVE_CALLING_CONVENTIONS
+extern mp_limb_t (*calling_conventions_function) (ANYARGS);
+mp_limb_t calling_conventions (ANYARGS);
+int calling_conventions_check (void);
+#define CALLING_CONVENTIONS(function) \
+  (calling_conventions_function = (function), calling_conventions)
+#define CALLING_CONVENTIONS_CHECK()    (calling_conventions_check())
+#else
+#define CALLING_CONVENTIONS(function)  (function)
+#define CALLING_CONVENTIONS_CHECK()    1 /* always ok */
+#endif
+
+
+extern int mp_trace_base;
+void mp_limb_trace (const char *, mp_limb_t);
+void mpn_trace (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea (const char *, const mp_ptr *, int, mp_size_t);
+void mpn_tracen (const char *, int, mp_srcptr, mp_size_t);
+void mpn_trace_file (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea_file (const char *, const mp_ptr *, int, mp_size_t);
+void mpf_trace (const char *, mpf_srcptr);
+void mpq_trace (const char *, mpq_srcptr);
+void mpz_trace (const char *, mpz_srcptr);
+void mpz_tracen (const char *, int, mpz_srcptr);
+void byte_trace (const char *, const void *, mp_size_t);
+void byte_tracen (const char *, int, const void *, mp_size_t);
+void d_trace (const char *, double);
+
+
+void spinner (void);
+extern unsigned long  spinner_count;
+extern int  spinner_wanted;
+extern int  spinner_tick;
+
+
+void *align_pointer (void *, size_t);
+void *__gmp_allocate_func_aligned (size_t, size_t);
+void *__gmp_allocate_or_reallocate (void *, size_t, size_t);
+char *__gmp_allocate_strdup (const char *);
+char *strtoupper (char *);
+mp_limb_t urandom (void);
+void call_rand_algs (void (*func) (const char *, gmp_randstate_t));
+
+
+void mpf_set_str_or_abort (mpf_ptr, const char *, int);
+
+
+void mpq_set_str_or_abort (mpq_ptr, const char *, int);
+
+
+void mpz_erandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_erandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_init_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_negrandom (mpz_ptr, gmp_randstate_t);
+int mpz_pow2abs_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+void mpz_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_set_str_or_abort (mpz_ptr, const char *, int);
+void mpz_clobber(mpz_ptr);
+
+mp_size_t mpn_diff_highest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t mpn_diff_lowest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_highest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_lowest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+
+mp_limb_t ref_addc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t ref_bswap_limb (mp_limb_t);
+unsigned long ref_popc_limb (mp_limb_t);
+mp_limb_t ref_subc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+
+
+void refmpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
+void refmpf_add_ulp (mpf_ptr );
+void refmpf_fill (mpf_ptr, mp_size_t, mp_limb_t);
+void refmpf_normalize (mpf_ptr);
+void refmpf_set_prec_limbs (mpf_ptr, unsigned long);
+unsigned long refmpf_set_overlap (mpf_ptr, mpf_srcptr);
+void refmpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
+int refmpf_validate (const char *, mpf_srcptr, mpf_srcptr);
+int refmpf_validate_division (const char *, mpf_srcptr, mpf_srcptr, mpf_srcptr);
+
+
+mp_limb_t refmpn_cnd_add_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_cnd_sub_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip2 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned, mp_limb_t);
+mp_limb_t refmpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+mp_limb_t refmpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+void refmpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_big_base (int);
+
+int refmpn_chars_per_limb (int);
+void refmpn_clrbit (mp_ptr, unsigned long);
+int refmpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_allowzero (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_twosizes (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy_extend (mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+unsigned refmpn_count_leading_zeros (mp_limb_t);
+unsigned refmpn_count_trailing_zeros (mp_limb_t);
+
+mp_limb_t refmpn_divexact_by3 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_divmod_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divmod_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+int refmpn_equal_anynail (mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_fill (mp_ptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_gcd_11 (mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_double_limb_t refmpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+
+size_t refmpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+
+unsigned long refmpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_invert_limb (mp_limb_t);
+void refmpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_ptr refmpn_malloc_limbs (mp_size_t);
+mp_ptr refmpn_malloc_limbs_aligned (mp_size_t, size_t);
+void refmpn_free_limbs (mp_ptr);
+mp_limb_t refmpn_msbone (mp_limb_t);
+mp_limb_t refmpn_msbone_mask (mp_limb_t);
+mp_ptr refmpn_memdup_limbs (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mod_34lsub1 (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+void refmpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul_any (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_normalize (mp_srcptr, mp_size_t);
+
+unsigned long refmpn_popcount (mp_srcptr, mp_size_t);
+mp_limb_t refmpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, unsigned);
+mp_limb_t refmpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+void refmpn_random (mp_ptr, mp_size_t);
+void refmpn_random2 (mp_ptr, mp_size_t);
+mp_limb_t refmpn_random_limb (void);
+
+mp_limb_t refmpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+
+mp_limb_t refmpn_sb_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+unsigned long refmpn_scan0 (mp_srcptr, unsigned long);
+unsigned long refmpn_scan1 (mp_srcptr, unsigned long);
+void refmpn_setbit (mp_ptr, unsigned long);
+void refmpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+void refmpn_sub_ddmmss (mp_limb_t *, mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+mp_limb_t refmpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+mp_limb_signed_t refmpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_signed_t refmpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_signed_t);
+
+void refmpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+int refmpn_tstbit (mp_srcptr, unsigned long);
+
+mp_limb_t refmpn_udiv_qrnnd (mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_udiv_qrnnd_r (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *);
+mp_limb_t refmpn_umul_ppmm (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_umul_ppmm_r (mp_limb_t, mp_limb_t, mp_limb_t *);
+
+void refmpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_zero (mp_ptr, mp_size_t);
+void refmpn_zero_extend (mp_ptr, mp_size_t, mp_size_t);
+int refmpn_zero_p (mp_srcptr, mp_size_t);
+
+void refmpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+
+void refmpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
+void refmpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+
+void refmpz_combit (mpz_ptr, unsigned long);
+unsigned long refmpz_hamdist (mpz_srcptr, mpz_srcptr);
+void refmpz_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker (mpz_srcptr, mpz_srcptr);
+int refmpz_jacobi (mpz_srcptr, mpz_srcptr);
+int refmpz_legendre (mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker_si (mpz_srcptr, long);
+int refmpz_kronecker_ui (mpz_srcptr, unsigned long);
+int refmpz_si_kronecker (long, mpz_srcptr);
+int refmpz_ui_kronecker (unsigned long, mpz_srcptr);
+
+void refmpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* Establish ostringstream and istringstream.  Do this here so as to hide
+   the conditionals, rather than putting stuff in each test program.
+
+   Oldish versions of g++, like 2.95.2, don't have <sstream>, only
+   <strstream>.  Fake up ostringstream and istringstream classes, but not a
+   full implementation, just enough for our purposes.  */
+
+#ifdef __cplusplus
+#if 1 || HAVE_SSTREAM
+#include <sstream>
+#else /* ! HAVE_SSTREAM */
+#include <string>
+#include <strstream>
+class
+ostringstream : public std::ostrstream {
+ public:
+  string str() {
+    int  pcount = ostrstream::pcount ();
+    char *s = (char *) (*__gmp_allocate_func) (pcount + 1);
+    memcpy (s, ostrstream::str(), pcount);
+    s[pcount] = '\0';
+    string ret = string(s);
+    (*__gmp_free_func) (s, pcount + 1);
+    return ret; }
+};
+class
+istringstream : public std::istrstream {
+ public:
+  istringstream (const char *s) : istrstream (s) { };
+};
+#endif /* ! HAVE_SSTREAM */
+#endif /* __cplusplus */
+
+
+#define TESTS_REPS(count, argv, argc)					\
+  do {									\
+  char *envval, *end;							\
+  double repfactor;							\
+  int reps_nondefault = 0;						\
+  if (argc > 1)								\
+    {									\
+      count = strtol (argv[1], &end, 0);				\
+      if (*end || count <= 0)						\
+	{								\
+	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);	\
+	  exit (1);							\
+	}								\
+      argv++;								\
+      argc--;								\
+      reps_nondefault = 1;						\
+    }									\
+  envval = getenv ("GMP_CHECK_REPFACTOR");				\
+  if (envval != NULL)							\
+    {									\
+      repfactor = strtod (envval, &end);				\
+      if (*end || repfactor <= 0)					\
+	{								\
+	  fprintf (stderr, "Invalid repfactor: %f.\n", repfactor);	\
+	  exit (1);							\
+	}								\
+      count *= repfactor;						\
+      count = MAX (count, 1);						\
+      reps_nondefault = 1;						\
+    }									\
+  if (reps_nondefault)							\
+    printf ("Running test with %ld repetitions (include this in bug reports)\n",\
+	    (long) count);						\
+  } while (0)
+
+
+#endif /* __TESTS_H__ */

diff --git a/tests/trace.c b/tests/trace.c
new file mode 100644
index 0000000..de397f5
--- /dev/null
+++ b/tests/trace.c

@@ -0,0 +1,317 @@
+/* Support for diagnostic traces.
+
+Copyright 1999-2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+
+/* Future: Would like commas printed between limbs in hex or binary, but
+   perhaps not always since it might upset cutting and pasting into bc or
+   whatever.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* Number base for the various trace printing routines.
+   Set this in main() or with the debugger.
+   If hexadecimal is going to be fed into GNU bc, remember to use -16
+   because bc requires upper case.  */
+
+int  mp_trace_base = 10;
+
+
+void
+mp_trace_start (const char *name)
+{
+  if (name != NULL && name[0] != '\0')
+    printf ("%s=", name);
+
+  switch (ABS (mp_trace_base)) {
+  case  2: printf ("bin:");                         break;
+  case  8: printf ("oct:");                         break;
+  case 10:                                          break;
+  case 16: printf ("0x");                           break;
+  default: printf ("base%d:", ABS (mp_trace_base)); break;
+  }
+}
+
+/* Print "name=value\n" to stdout for an mpq_t value.  */
+void
+mpq_trace (const char *name, mpq_srcptr q)
+{
+  mp_trace_start (name);
+  if (q == NULL)
+    {
+      printf ("NULL\n");
+      return;
+    }
+
+  mpq_out_str (stdout, mp_trace_base, q);
+  printf ("\n");
+}
+
+
+/* Print "name=value\n" to stdout for an mpz_t value.  */
+void
+mpz_trace (const char *name, mpz_srcptr z)
+{
+  mpq_t      q;
+  mp_limb_t  one;
+
+  if (z == NULL)
+    {
+      mpq_trace (name, NULL);
+      return;
+    }
+
+  q->_mp_num._mp_alloc = ALLOC(z);
+  q->_mp_num._mp_size = SIZ(z);
+  q->_mp_num._mp_d = PTR(z);
+
+  one = 1;
+  q->_mp_den._mp_alloc = 1;
+  q->_mp_den._mp_size = 1;
+  q->_mp_den._mp_d = &one;
+
+  mpq_trace(name, q);
+}
+
+
+/* Print "name=value\n" to stdout for an mpf_t value. */
+void
+mpf_trace (const char *name, mpf_srcptr f)
+{
+  mp_trace_start (name);
+  if (f == NULL)
+    {
+      printf ("NULL\n");
+      return;
+    }
+
+  mpf_out_str (stdout, ABS (mp_trace_base), 0, f);
+  printf ("\n");
+}
+
+
+/* Print "namenum=value\n" to stdout for an mpz_t value.
+   "name" should have a "%d" to get the number. */
+void
+mpz_tracen (const char *name, int num, mpz_srcptr z)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  mpz_trace (NULL, z);
+}
+
+
+/* Print "name=value\n" to stdout for an mpn style ptr,size. */
+void
+mpn_trace (const char *name, mp_srcptr ptr, mp_size_t size)
+{
+  mpz_t  z;
+  if (ptr == NULL)
+    {
+      mpz_trace (name, NULL);
+      return;
+    }
+  MPN_NORMALIZE (ptr, size);
+  PTR(z) = (mp_ptr) ptr;
+  SIZ(z) = size;
+  ALLOC(z) = size;
+  mpz_trace (name, z);
+}
+
+/* Print "name=value\n" to stdout for a limb, nail doesn't have to be zero. */
+void
+mp_limb_trace (const char *name, mp_limb_t n)
+{
+#if GMP_NAIL_BITS != 0
+  mp_limb_t  a[2];
+  a[0] = n & GMP_NUMB_MASK;
+  a[1] = n >> GMP_NUMB_BITS;
+  mpn_trace (name, a, (mp_size_t) 2);
+#else
+  mpn_trace (name, &n, (mp_size_t) 1);
+#endif
+}
+
+
+/* Print "namenum=value\n" to stdout for an mpn style ptr,size.
+   "name" should have a "%d" to get the number.  */
+void
+mpn_tracen (const char *name, int num, mp_srcptr ptr, mp_size_t size)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  mpn_trace (NULL, ptr, size);
+}
+
+
+/* Print "namenum=value\n" to stdout for an array of mpn style ptr,size.
+
+   "a" is an array of pointers, each a[i] is a pointer to "size" many limbs.
+   The formal parameter isn't mp_srcptr because that causes compiler
+   warnings, but the values aren't modified.
+
+   "name" should have a printf style "%d" to get the array index.  */
+
+void
+mpn_tracea (const char *name, const mp_ptr *a, int count, mp_size_t size)
+{
+  int i;
+  for (i = 0; i < count; i++)
+    mpn_tracen (name, i, a[i], size);
+}
+
+
+/* Print "value\n" to a file for an mpz_t value.  Any previous contents of
+   the file are overwritten, so you need different file names each time this
+   is called.
+
+   Overwriting the file is a feature, it means you get old data replaced
+   when you run a test program repeatedly.  */
+
+void
+mpn_trace_file (const char *filename, mp_srcptr ptr, mp_size_t size)
+{
+  FILE   *fp;
+  mpz_t  z;
+
+  fp = fopen (filename, "w");
+  if (fp == NULL)
+    {
+      perror ("fopen");
+      abort();
+    }
+
+  MPN_NORMALIZE (ptr, size);
+  PTR(z) = (mp_ptr) ptr;
+  SIZ(z) = (int) size;
+
+  mpz_out_str (fp, mp_trace_base, z);
+  fprintf (fp, "\n");
+
+  if (ferror (fp) || fclose (fp) != 0)
+    {
+      printf ("error writing %s\n", filename);
+      abort();
+    }
+}
+
+
+/* Print "value\n" to a set of files, one file for each element of the given
+   array of mpn style ptr,size.  Any previous contents of the files are
+   overwritten, so you need different file names each time this is called.
+   Each file is "filenameN" where N is 0 to count-1.
+
+   "a" is an array of pointers, each a[i] is a pointer to "size" many limbs.
+   The formal parameter isn't mp_srcptr because that causes compiler
+   warnings, but the values aren't modified.
+
+   Overwriting the files is a feature, it means you get old data replaced
+   when you run a test program repeatedly.  The output style isn't
+   particularly pretty, but at least it gets something out, and you can cat
+   the files into bc, or whatever. */
+
+void
+mpn_tracea_file (const char *filename,
+                 const mp_ptr *a, int count, mp_size_t size)
+{
+  char  *s;
+  int   i;
+  TMP_DECL;
+
+  TMP_MARK;
+  s = (char *) TMP_ALLOC (strlen (filename) + 50);
+
+  for (i = 0; i < count; i++)
+    {
+      sprintf (s, "%s%d", filename, i);
+      mpn_trace_file (s, a[i], size);
+    }
+
+  TMP_FREE;
+}
+
+
+void
+byte_trace (const char *name, const void *ptr, mp_size_t size)
+{
+  const char *fmt;
+  mp_size_t  i;
+
+  mp_trace_start (name);
+
+  switch (mp_trace_base) {
+  case   8: fmt = " %o"; break;
+  case  10: fmt = " %d"; break;
+  case  16: fmt = " %x"; break;
+  case -16: fmt = " %X"; break;
+  default: printf ("Oops, unsupported base in byte_trace\n"); abort (); break;
+  }
+
+  for (i = 0; i < size; i++)
+    printf (fmt, (int) ((unsigned char *) ptr)[i]);
+  printf ("\n");
+}
+
+void
+byte_tracen (const char *name, int num, const void *ptr, mp_size_t size)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  byte_trace (NULL, ptr, size);
+}
+
+
+void
+d_trace (const char *name, double d)
+{
+  union {
+    double         d;
+    unsigned char  b[sizeof(double)];
+  } u;
+  int  i;
+
+  if (name != NULL && name[0] != '\0')
+    printf ("%s=", name);
+
+  u.d = d;
+  printf ("[");
+  for (i = 0; i < sizeof (u.b); i++)
+    {
+      if (i != 0)
+        printf (" ");
+      printf ("%02X", (int) u.b[i]);
+    }
+  printf ("] %.20g\n", d);
+}

diff --git a/tests/x86call.asm b/tests/x86call.asm
new file mode 100644
index 0000000..439f781
--- /dev/null
+++ b/tests/x86call.asm

@@ -0,0 +1,154 @@
+dnl  x86 calling conventions checking.
+
+dnl  Copyright 2000, 2003, 2010, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  https://www.gnu.org/licenses/.
+
+
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
+
+include(`config.m4')
+
+
+C void x86_fldcw (unsigned short cw);
+C
+C Execute an fldcw, setting the x87 control word to cw.
+
+PROLOGUE(x86_fldcw)
+	fldcw	4(%esp)
+	ret
+EPILOGUE()
+
+
+C unsigned short x86_fstcw (void);
+C
+C Execute an fstcw, returning the current x87 control word.
+
+PROLOGUE(x86_fstcw)
+	xor	%eax, %eax
+	push	%eax
+	fstcw	(%esp)
+	pop	%eax
+	ret
+EPILOGUE()
+
+
+dnl  Instrumented profiling doesn't come out quite right below, since we don't
+dnl  do an actual "ret".  There's only a few instructions here, so there's no
+dnl  great need to get them separately accounted, just let them get attributed
+dnl  to the caller.  FIXME this comment might no longer be true.
+
+ifelse(WANT_PROFILING,instrument,
+`define(`WANT_PROFILING',no)')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+C
+C Perhaps the finit should be done only if the tags word isn't clear, but
+C nothing uses the rounding mode or anything at the moment.
+
+define(`WANT_EBX', eval(4*0)($1))
+define(`WANT_EBP', eval(4*1)($1))
+define(`WANT_ESI', eval(4*2)($1))
+define(`WANT_EDI', eval(4*3)($1))
+
+define(`JUNK_EAX', eval(4*4)($1))
+define(`JUNK_ECX', eval(4*5)($1))
+define(`JUNK_EDX', eval(4*6)($1))
+
+define(`SAVE_EBX', eval(4*7)($1))
+define(`SAVE_EBP', eval(4*8)($1))
+define(`SAVE_ESI', eval(4*9)($1))
+define(`SAVE_EDI', eval(4*10)($1))
+
+define(`RETADDR',  eval(4*11)($1))
+
+define(`EBX',	   eval(4*12)($1))
+define(`EBP',	   eval(4*13)($1))
+define(`ESI',	   eval(4*14)($1))
+define(`EDI',	   eval(4*15)($1))
+define(`EFLAGS',   eval(4*16)($1))
+
+
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
+
+	TEXT
+	ALIGN(8)
+PROLOGUE(calling_conventions)
+	LEA(	G(calling_conventions_values), %ecx)
+	pop	RETADDR(%ecx)
+
+	mov	%ebx, SAVE_EBX(%ecx)
+	mov	%ebp, SAVE_EBP(%ecx)
+	mov	%esi, SAVE_ESI(%ecx)
+	mov	%edi, SAVE_EDI(%ecx)
+
+	C Values we expect to see unchanged, as per amd64check.c
+	mov	WANT_EBX(%ecx), %ebx
+	mov	WANT_EBP(%ecx), %ebp
+	mov	WANT_ESI(%ecx), %esi
+	mov	WANT_EDI(%ecx), %edi
+
+	C Try to provoke a problem by starting with junk in the caller-saves
+	C registers, especially in %eax and %edx which will be return values
+	mov	JUNK_EAX(%ecx), %eax
+	mov	JUNK_EDX(%ecx), %edx
+C	mov	JUNK_ECX(%ecx), %ecx
+
+ifdef(`PIC',`
+	LEA(	G(calling_conventions_function), %ecx)
+	call	*(%ecx)
+',`
+	call	*G(calling_conventions_function)
+')
+
+	LEA(	G(calling_conventions_values), %ecx)
+
+	mov	%ebx, EBX(%ecx)
+	mov	%ebp, EBP(%ecx)
+	mov	%esi, ESI(%ecx)
+	mov	%edi, EDI(%ecx)
+
+	pushf
+	pop	%ebx
+	mov	%ebx, EFLAGS(%ecx)
+
+	mov	SAVE_EBX(%ecx), %ebx
+	mov	SAVE_ESI(%ecx), %esi
+	mov	SAVE_EDI(%ecx), %edi
+	mov	SAVE_EBP(%ecx), %ebp
+
+	push	RETADDR(%ecx)
+
+ifdef(`PIC',`
+	LEA(	G(calling_conventions_fenv), %ecx)
+	fstenv	(%ecx)
+',`
+	fstenv	G(calling_conventions_fenv)
+')
+	finit
+
+	ret
+
+EPILOGUE()
+ASM_END()

diff --git a/tests/x86check.c b/tests/x86check.c
new file mode 100644
index 0000000..12ea5b7
--- /dev/null
+++ b/tests/x86check.c

@@ -0,0 +1,117 @@
+/* x86 calling conventions checking. */
+
+/*
+Copyright 2000, 2001, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values.  We use one vector to allow access
+   via a base pointer, very beneficial for the PIC-enabled amd64call.asm.  */
+mp_limb_t calling_conventions_values[17] =
+{
+  CNST_LIMB(0x12345678),	/* want_ebx */
+  CNST_LIMB(0x89ABCDEF),	/* want_ebp */
+  CNST_LIMB(0xDEADBEEF),	/* want_esi */
+  CNST_LIMB(0xFFEEDDCC),	/* want_edi */
+
+  CNST_LIMB(0xFEEDABBA),	/* JUNK_EAX */
+  CNST_LIMB(0xAB78DE89),	/* JUNK_ECX */
+  CNST_LIMB(0x12389018)		/* JUNK_EDX */
+
+  /* rest of array used for dynamic values.  */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT	0
+#define JUNK	4
+#define SAVE	7
+#define RETADDR	11
+#define VAL	12
+#define EFLAGS	16
+
+
+/* values to check */
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct {
+  unsigned  control;
+  unsigned  status;
+  unsigned  tag;
+  unsigned  other[4];
+} calling_conventions_fenv;
+#ifdef __cplusplus
+}
+#endif
+
+/* expected values, as per x86call.asm */
+#define VALUE_EBX   0x01234567
+#define VALUE_ESI   0x89ABCDEF
+#define VALUE_EDI   0xFEDCBA98
+#define VALUE_EBP   0x76543210
+
+
+const char *regname[] = {"ebx", "ebp", "esi", "edi"};
+
+#define DIR_BIT(eflags)   (((eflags) & (1<<10)) != 0)
+
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+  int i;
+
+#define CHECK(callreg, regstr, value)                   \
+  if (callreg != value)                                 \
+    {                                                   \
+      printf ("%s   %s  got 0x%08X want 0x%08X\n",      \
+              header, regstr, callreg, value);          \
+      header = "";                                      \
+      ret = 0;                                          \
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      CHECK (calling_conventions_values[VAL+i], regname[i], calling_conventions_values[WANT+i]);
+    }
+
+  if (DIR_BIT (calling_conventions_values[EFLAGS]) != 0)
+    {
+      printf ("%s   eflags dir bit  got %d want 0\n",
+              header, DIR_BIT (calling_conventions_values[EFLAGS]));
+      header = "";
+      ret = 0;
+    }
+
+  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)
+    {
+      printf ("%s   fpu tags  got 0x%X want 0xFFFF\n",
+              header, calling_conventions_fenv.tag & 0xFFFF);
+      header = "";
+      ret = 0;
+    }
+
+  return ret;
+}

diff --git a/version.c b/version.c
new file mode 100644
index 0000000..ee7c32a
--- /dev/null
+++ b/version.c

@@ -0,0 +1,33 @@
+/* gmp_version -- version number compiled into the library.
+
+Copyright 1996, 1999-2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+const char * const gmp_version = VERSION;
commit	de13a668a7c04b75b9dea160298aeed62ce0a5ca	[log] [tgz]
author	Googler <noreply@google.com>	Mon Feb 03 19:06:00 2025 +0000
committer	Sagnik Ghosh <sagnikghosh@google.com>	Mon Feb 03 12:06:35 2025 -0800
tree	50a00fcb1239037901b56583c328f6635af73323