google3/third_party/grte/v4_src/glibc-2.19/sysdeps/ieee754/dbl-64/mpa.c - GRTEv4 - Git at Google

 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
  * Copyright (C) 2001-2014 Free Software Foundation, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation; either version 2.1 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 /************************************************************************/
 /*  MODULE_NAME: mpa.c                                                  */
 /*                                                                      */
 /*  FUNCTIONS:                                                          */
 /*               mcr                                                    */
 /*               acr                                                    */
 /*               cpy                                                    */
 /*               norm                                                   */
 /*               denorm                                                 */
 /*               mp_dbl                                                 */
 /*               dbl_mp                                                 */
 /*               add_magnitudes                                         */
 /*               sub_magnitudes                                         */
 /*               add                                                    */
 /*               sub                                                    */
 /*               mul                                                    */
 /*               inv                                                    */
 /*               dvd                                                    */
 /*                                                                      */
 /* Arithmetic functions for multiple precision numbers.                 */
 /* Relative errors are bounded                                          */
 /************************************************************************/


 #include "endian.h"
 #include "mpa.h"
 #include <sys/param.h>
 #include <alloca.h>

 #ifndef SECTION
 # define SECTION
 #endif

 #ifndef NO__CONST
 const mp_no mpone = { 1, { 1.0, 1.0 } };
 const mp_no mptwo = { 1, { 1.0, 2.0 } };
 #endif

 #ifndef NO___ACR
 /* Compare mantissa of two multiple precision numbers regardless of the sign
    and exponent of the numbers.  */
 static int
 mcr (const mp_no *x, const mp_no *y, int p)
 {
   long i;
   long p2 = p;
   for (i = 1; i <= p2; i++)
     {
       if (X[i] == Y[i])
 	continue;
       else if (X[i] > Y[i])
 	return 1;
       else
 	return -1;
     }
   return 0;
 }

 /* Compare the absolute values of two multiple precision numbers.  */
 int
 __acr (const mp_no *x, const mp_no *y, int p)
 {
   long i;

   if (X[0] == 0)
     {
       if (Y[0] == 0)
 	i = 0;
       else
 	i = -1;
     }
   else if (Y[0] == 0)
     i = 1;
   else
     {
       if (EX > EY)
 	i = 1;
       else if (EX < EY)
 	i = -1;
       else
 	i = mcr (x, y, p);
     }

   return i;
 }
 #endif

 #ifndef NO___CPY
 /* Copy multiple precision number X into Y.  They could be the same
    number.  */
 void
 __cpy (const mp_no *x, mp_no *y, int p)
 {
   long i;

   EY = EX;
   for (i = 0; i <= p; i++)
     Y[i] = X[i];
 }
 #endif

 #ifndef NO___MP_DBL
 /* Convert a multiple precision number *X into a double precision
    number *Y, normalized case  (|x| >= 2**(-1022))).  */
 static void
 norm (const mp_no *x, double *y, int p)
 {
 # define R RADIXI
   long i;
   double c;
   mantissa_t a, u, v, z[5];
   if (p < 5)
     {
       if (p == 1)
 	c = X[1];
       else if (p == 2)
 	c = X[1] + R * X[2];
       else if (p == 3)
 	c = X[1] + R * (X[2] + R * X[3]);
       else if (p == 4)
 	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
     }
   else
     {
       for (a = 1, z[1] = X[1]; z[1] < TWO23; )
 	{
 	  a *= 2;
 	  z[1] *= 2;
 	}

       for (i = 2; i < 5; i++)
 	{
 	  mantissa_store_t d, r;
 	  d = X[i] * (mantissa_store_t) a;
 	  DIV_RADIX (d, r);
 	  z[i] = r;
 	  z[i - 1] += d;
 	}

       u = ALIGN_DOWN_TO (z[3], TWO19);
       v = z[3] - u;

       if (v == TWO18)
 	{
 	  if (z[4] == 0)
 	    {
 	      for (i = 5; i <= p; i++)
 		{
 		  if (X[i] == 0)
 		    continue;
 		  else
 		    {
 		      z[3] += 1;
 		      break;
 		    }
 		}
 	    }
 	  else
 	    z[3] += 1;
 	}

       c = (z[1] + R * (z[2] + R * z[3])) / a;
     }

   c *= X[0];

   for (i = 1; i < EX; i++)
     c *= RADIX;
   for (i = 1; i > EX; i--)
     c *= RADIXI;

   *y = c;
 # undef R
 }

 /* Convert a multiple precision number *X into a double precision
    number *Y, Denormal case  (|x| < 2**(-1022))).  */
 static void
 denorm (const mp_no *x, double *y, int p)
 {
   long i, k;
   long p2 = p;
   double c;
   mantissa_t u, z[5];

 # define R RADIXI
   if (EX < -44 || (EX == -44 && X[1] < TWO5))
     {
       *y = 0;
       return;
     }

   if (p2 == 1)
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
 	  z[2] = 0;
 	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
 	{
 	  z[1] = TWO10;
 	  z[2] = X[1];
 	  z[3] = 0;
 	  k = 2;
 	}
       else
 	{
 	  z[1] = TWO10;
 	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
     }
   else if (p2 == 2)
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
 	  z[2] = X[2];
 	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
 	{
 	  z[1] = TWO10;
 	  z[2] = X[1];
 	  z[3] = X[2];
 	  k = 2;
 	}
       else
 	{
 	  z[1] = TWO10;
 	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
     }
   else
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
 	  z[2] = X[2];
 	  k = 3;
 	}
       else if (EX == -43)
 	{
 	  z[1] = TWO10;
 	  z[2] = X[1];
 	  k = 2;
 	}
       else
 	{
 	  z[1] = TWO10;
 	  z[2] = 0;
 	  k = 1;
 	}
       z[3] = X[k];
     }

   u = ALIGN_DOWN_TO (z[3], TWO5);

   if (u == z[3])
     {
       for (i = k + 1; i <= p2; i++)
 	{
 	  if (X[i] == 0)
 	    continue;
 	  else
 	    {
 	      z[3] += 1;
 	      break;
 	    }
 	}
     }

   c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);

   *y = c * TWOM1032;
 # undef R
 }

 /* Convert multiple precision number *X into double precision number *Y.  The
    result is correctly rounded to the nearest/even.  */
 void
 __mp_dbl (const mp_no *x, double *y, int p)
 {
   if (X[0] == 0)
     {
       *y = 0;
       return;
     }

   if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
     norm (x, y, p);
   else
     denorm (x, y, p);
 }
 #endif

 /* Get the multiple precision equivalent of X into *Y.  If the precision is too
    small, the result is truncated.  */
 void
 SECTION
 __dbl_mp (double x, mp_no *y, int p)
 {
   long i, n;
   long p2 = p;

   /* Sign.  */
   if (x == 0)
     {
       Y[0] = 0;
       return;
     }
   else if (x > 0)
     Y[0] = 1;
   else
     {
       Y[0] = -1;
       x = -x;
     }

   /* Exponent.  */
   for (EY = 1; x >= RADIX; EY += 1)
     x *= RADIXI;
   for (; x < 1; EY -= 1)
     x *= RADIX;

   /* Digits.  */
   n = MIN (p2, 4);
   for (i = 1; i <= n; i++)
     {
       INTEGER_OF (x, Y[i]);
       x *= RADIX;
     }
   for (; i <= p2; i++)
     Y[i] = 0;
 }

 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
    sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
    Y and Z.  No guard digit is used.  The result equals the exact sum,
    truncated.  */
 static void
 SECTION
 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k;
   long p2 = p;
   mantissa_t zk;

   EZ = EX;

   i = p2;
   j = p2 + EY - EX;
   k = p2 + 1;

   if (__glibc_unlikely (j < 1))
     {
       __cpy (x, z, p);
       return;
     }

   zk = 0;

   for (; j > 0; i--, j--)
     {
       zk += X[i] + Y[j];
       if (zk >= RADIX)
 	{
 	  Z[k--] = zk - RADIX;
 	  zk = 1;
 	}
       else
 	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
     }

   for (; i > 0; i--)
     {
       zk += X[i];
       if (zk >= RADIX)
 	{
 	  Z[k--] = zk - RADIX;
 	  zk = 1;
 	}
       else
 	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
     }

   if (zk == 0)
     {
       for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
     }
   else
     {
       Z[1] = zk;
       EZ += 1;
     }
 }

 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
    The sign of the difference *Z is not changed.  X and Y may overlap but not X
    and Z or Y and Z.  One guard digit is used.  The error is less than one
    ULP.  */
 static void
 SECTION
 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k;
   long p2 = p;
   mantissa_t zk;

   EZ = EX;
   i = p2;
   j = p2 + EY - EX;
   k = p2;

   /* Y is too small compared to X, copy X over to the result.  */
   if (__glibc_unlikely (j < 1))
     {
       __cpy (x, z, p);
       return;
     }

   /* The relevant least significant digit in Y is non-zero, so we factor it in
      to enhance accuracy.  */
   if (j < p2 && Y[j + 1] > 0)
     {
       Z[k + 1] = RADIX - Y[j + 1];
       zk = -1;
     }
   else
     zk = Z[k + 1] = 0;

   /* Subtract and borrow.  */
   for (; j > 0; i--, j--)
     {
       zk += (X[i] - Y[j]);
       if (zk < 0)
 	{
 	  Z[k--] = zk + RADIX;
 	  zk = -1;
 	}
       else
 	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
     }

   /* We're done with digits from Y, so it's just digits in X.  */
   for (; i > 0; i--)
     {
       zk += X[i];
       if (zk < 0)
 	{
 	  Z[k--] = zk + RADIX;
 	  zk = -1;
 	}
       else
 	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
     }

   /* Normalize.  */
   for (i = 1; Z[i] == 0; i++)
     ;
   EZ = EZ - i + 1;
   for (k = 1; i <= p2 + 1; )
     Z[k++] = Z[i++];
   for (; k <= p2; )
     Z[k++] = 0;
 }

 /* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
    and Z or Y and Z.  One guard digit is used.  The error is less than one
    ULP.  */
 void
 SECTION
 __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;

   if (X[0] == 0)
     {
       __cpy (y, z, p);
       return;
     }
   else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
     }

   if (X[0] == Y[0])
     {
       if (__acr (x, y, p) > 0)
 	{
 	  add_magnitudes (x, y, z, p);
 	  Z[0] = X[0];
 	}
       else
 	{
 	  add_magnitudes (y, x, z, p);
 	  Z[0] = Y[0];
 	}
     }
   else
     {
       if ((n = __acr (x, y, p)) == 1)
 	{
 	  sub_magnitudes (x, y, z, p);
 	  Z[0] = X[0];
 	}
       else if (n == -1)
 	{
 	  sub_magnitudes (y, x, z, p);
 	  Z[0] = Y[0];
 	}
       else
 	Z[0] = 0;
     }
 }

 /* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
    not X and Z or Y and Z.  One guard digit is used.  The error is less than
    one ULP.  */
 void
 SECTION
 __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;

   if (X[0] == 0)
     {
       __cpy (y, z, p);
       Z[0] = -Z[0];
       return;
     }
   else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
     }

   if (X[0] != Y[0])
     {
       if (__acr (x, y, p) > 0)
 	{
 	  add_magnitudes (x, y, z, p);
 	  Z[0] = X[0];
 	}
       else
 	{
 	  add_magnitudes (y, x, z, p);
 	  Z[0] = -Y[0];
 	}
     }
   else
     {
       if ((n = __acr (x, y, p)) == 1)
 	{
 	  sub_magnitudes (x, y, z, p);
 	  Z[0] = X[0];
 	}
       else if (n == -1)
 	{
 	  sub_magnitudes (y, x, z, p);
 	  Z[0] = -Y[0];
 	}
       else
 	Z[0] = 0;
     }
 }

 #ifndef NO__MUL
 /* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
    and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
    digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
 void
 SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k, ip, ip2;
   long p2 = p;
   mantissa_store_t zk;
   const mp_no *a;
   mantissa_store_t *diag;

   /* Is z=0?  */
   if (__glibc_unlikely (X[0] * Y[0] == 0))
     {
       Z[0] = 0;
       return;
     }

   /* We need not iterate through all X's and Y's since it's pointless to
      multiply zeroes.  Here, both are zero...  */
   for (ip2 = p2; ip2 > 0; ip2--)
     if (X[ip2] != 0 || Y[ip2] != 0)
       break;

   a = X[ip2] != 0 ? y : x;

   /* ... and here, at least one of them is still zero.  */
   for (ip = ip2; ip > 0; ip--)
     if (a->d[ip] != 0)
       break;

   /* The product looks like this for p = 3 (as an example):


 				a1    a2    a3
 		 x		b1    b2    b3
 		 -----------------------------
 			     a1*b3 a2*b3 a3*b3
 		       a1*b2 a2*b2 a3*b2
 		 a1*b1 a2*b1 a3*b1

      So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
      for P >= 3.  We compute the above digits in two parts; the last P-1
      digits and then the first P digits.  The last P-1 digits are a sum of
      products of the input digits from P to P-k where K is 0 for the least
      significant digit and increases as we go towards the left.  The product
      term is of the form X[k]*X[P-k] as can be seen in the above example.

      The first P digits are also a sum of products with the same product term,
      except that the sum is from 1 to k.  This is also evident from the above
      example.

      Another thing that becomes evident is that only the most significant
      ip+ip2 digits of the result are non-zero, where ip and ip2 are the
      'internal precision' of the input numbers, i.e. digits after ip and ip2
      are all 0.  */

   k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;

   while (k > ip + ip2 + 1)
     Z[k--] = 0;

   zk = 0;

   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
   diag = alloca (k * sizeof (mantissa_store_t));
   mantissa_store_t d = 0;
   for (i = 1; i <= ip; i++)
     {
       d += X[i] * (mantissa_store_t) Y[i];
       diag[i] = d;
     }
   while (i < k)
     diag[i++] = d;

   while (k > p2)
     {
       long lim = k / 2;

       if (k % 2 == 0)
 	/* We want to add this only once, but since we subtract it in the sum
 	   of products above, we add twice.  */
 	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];

       for (i = k - p2, j = p2; i < j; i++, j--)
 	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);

       zk -= diag[k - 1];

       DIV_RADIX (zk, Z[k]);
       k--;
     }

   /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
      goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
      number of multiplications, we halve the range and if k is an even number,
      add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
      X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].

      This reduction tells us that we're summing two things, the first term
      through the half range and the negative of the sum of the product of all
      terms of X and Y in the full range.  i.e.

      SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
      a single loop so that it completes in O(n) time and can hence be directly
      used in the loop below.  */
   while (k > 1)
     {
       long lim = k / 2;

       if (k % 2 == 0)
 	/* We want to add this only once, but since we subtract it in the sum
 	   of products above, we add twice.  */
         zk += 2 * X[lim] * (mantissa_store_t) Y[lim];

       for (i = 1, j = k - 1; i < j; i++, j--)
 	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);

       zk -= diag[k - 1];

       DIV_RADIX (zk, Z[k]);
       k--;
     }
   Z[k] = zk;

   /* Get the exponent sum into an intermediate variable.  This is a subtle
      optimization, where given enough registers, all operations on the exponent
      happen in registers and the result is written out only once into EZ.  */
   int e = EX + EY;

   /* Is there a carry beyond the most significant digit?  */
   if (__glibc_unlikely (Z[1] == 0))
     {
       for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
       e--;
     }

   EZ = e;
   Z[0] = X[0] * Y[0];
 }
 #endif

 #ifndef NO__SQR
 /* Square *X and store result in *Y.  X and Y may not overlap.  For P in
    [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
    error is bounded by 1.001 ULP.  This is a faster special case of
    multiplication.  */
 void
 SECTION
 __sqr (const mp_no *x, mp_no *y, int p)
 {
   long i, j, k, ip;
   mantissa_store_t yk;

   /* Is z=0?  */
   if (__glibc_unlikely (X[0] == 0))
     {
       Y[0] = 0;
       return;
     }

   /* We need not iterate through all X's since it's pointless to
      multiply zeroes.  */
   for (ip = p; ip > 0; ip--)
     if (X[ip] != 0)
       break;

   k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;

   while (k > 2 * ip + 1)
     Y[k--] = 0;

   yk = 0;

   while (k > p)
     {
       mantissa_store_t yk2 = 0;
       long lim = k / 2;

       if (k % 2 == 0)
 	yk += X[lim] * (mantissa_store_t) X[lim];

       /* In __mul, this loop (and the one within the next while loop) run
          between a range to calculate the mantissa as follows:

          Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
 		+ X[n] * Y[k]

          For X == Y, we can get away with summing halfway and doubling the
 	 result.  For cases where the range size is even, the mid-point needs
 	 to be added separately (above).  */
       for (i = k - p, j = p; i < j; i++, j--)
 	yk2 += X[i] * (mantissa_store_t) X[j];

       yk += 2 * yk2;

       DIV_RADIX (yk, Y[k]);
       k--;
     }

   while (k > 1)
     {
       mantissa_store_t yk2 = 0;
       long lim = k / 2;

       if (k % 2 == 0)
 	yk += X[lim] * (mantissa_store_t) X[lim];

       /* Likewise for this loop.  */
       for (i = 1, j = k - 1; i < j; i++, j--)
 	yk2 += X[i] * (mantissa_store_t) X[j];

       yk += 2 * yk2;

       DIV_RADIX (yk, Y[k]);
       k--;
     }
   Y[k] = yk;

   /* Squares are always positive.  */
   Y[0] = 1;

   /* Get the exponent sum into an intermediate variable.  This is a subtle
      optimization, where given enough registers, all operations on the exponent
      happen in registers and the result is written out only once into EZ.  */
   int e = EX * 2;

   /* Is there a carry beyond the most significant digit?  */
   if (__glibc_unlikely (Y[1] == 0))
     {
       for (i = 1; i <= p; i++)
 	Y[i] = Y[i + 1];
       e--;
     }

   EY = e;
 }
 #endif

 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
    - For P = 3: 1.063 * R ^ (1 - P)
    - For P > 3: 2.001 * R ^ (1 - P)

    *X = 0 is not permissible.  */
 static void
 SECTION
 __inv (const mp_no *x, mp_no *y, int p)
 {
   long i;
   double t;
   mp_no z, w;
   static const int np1[] =
     { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
   };

   __cpy (x, &z, p);
   z.e = 0;
   __mp_dbl (&z, &t, p);
   t = 1 / t;
   __dbl_mp (t, y, p);
   EY -= EX;

   for (i = 0; i < np1[p]; i++)
     {
       __cpy (y, &w, p);
       __mul (x, &w, y, p);
       __sub (&mptwo, y, &z, p);
       __mul (&w, &z, y, p);
     }
 }

 /* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
    or Y and Z.  Relative error bound:
    - For P = 2: 2.001 * R ^ (1 - P)
    - For P = 3: 2.063 * R ^ (1 - P)
    - For P > 3: 3.001 * R ^ (1 - P)

    *X = 0 is not permissible.  */
 void
 SECTION
 __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   mp_no w;

   if (X[0] == 0)
     Z[0] = 0;
   else
     {
       __inv (y, &w, p);
       __mul (x, &w, z, p);
     }
 }
	/*
	* IBM Accurate Mathematical Library
	* written by International Business Machines Corp.
	* Copyright (C) 2001-2014 Free Software Foundation, Inc.
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser General Public License as published by
	* the Free Software Foundation; either version 2.1 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public License
	* along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/
	/************************************************************************/
	/* MODULE_NAME: mpa.c */
	/* */
	/* FUNCTIONS: */
	/* mcr */
	/* acr */
	/* cpy */
	/* norm */
	/* denorm */
	/* mp_dbl */
	/* dbl_mp */
	/* add_magnitudes */
	/* sub_magnitudes */
	/* add */
	/* sub */
	/* mul */
	/* inv */
	/* dvd */
	/* */
	/* Arithmetic functions for multiple precision numbers. */
	/* Relative errors are bounded */
	/************************************************************************/


	#include "endian.h"
	#include "mpa.h"
	#include <sys/param.h>
	#include <alloca.h>

	#ifndef SECTION
	# define SECTION
	#endif

	#ifndef NO__CONST
	const mp_no mpone = { 1, { 1.0, 1.0 } };
	const mp_no mptwo = { 1, { 1.0, 2.0 } };
	#endif

	#ifndef NO___ACR
	/* Compare mantissa of two multiple precision numbers regardless of the sign
	and exponent of the numbers. */
	static int
	mcr (const mp_no x, const mp_no y, int p)
	{
	long i;
	long p2 = p;
	for (i = 1; i <= p2; i++)
	{
	if (X[i] == Y[i])
	continue;
	else if (X[i] > Y[i])
	return 1;
	else
	return -1;
	}
	return 0;
	}

	/* Compare the absolute values of two multiple precision numbers. */
	int
	__acr (const mp_no x, const mp_no y, int p)
	{
	long i;

	if (X[0] == 0)
	{
	if (Y[0] == 0)
	i = 0;
	else
	i = -1;
	}
	else if (Y[0] == 0)
	i = 1;
	else
	{
	if (EX > EY)
	i = 1;
	else if (EX < EY)
	i = -1;
	else
	i = mcr (x, y, p);
	}

	return i;
	}
	#endif

	#ifndef NO___CPY
	/* Copy multiple precision number X into Y. They could be the same
	number. */
	void
	__cpy (const mp_no x, mp_no y, int p)
	{
	long i;

	EY = EX;
	for (i = 0; i <= p; i++)
	Y[i] = X[i];
	}
	#endif

	#ifndef NO___MP_DBL
	/* Convert a multiple precision number *X into a double precision
	number Y, normalized case (\|x\| >= 2(-1022))). /
	static void
	norm (const mp_no x, double y, int p)
	{
	# define R RADIXI
	long i;
	double c;
	mantissa_t a, u, v, z[5];
	if (p < 5)
	{
	if (p == 1)
	c = X[1];
	else if (p == 2)
	c = X[1] + R * X[2];
	else if (p == 3)
	c = X[1] + R * (X[2] + R * X[3]);
	else if (p == 4)
	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
	}
	else
	{
	for (a = 1, z[1] = X[1]; z[1] < TWO23; )
	{
	a *= 2;
	z[1] *= 2;
	}

	for (i = 2; i < 5; i++)
	{
	mantissa_store_t d, r;
	d = X[i] * (mantissa_store_t) a;
	DIV_RADIX (d, r);
	z[i] = r;
	z[i - 1] += d;
	}

	u = ALIGN_DOWN_TO (z[3], TWO19);
	v = z[3] - u;

	if (v == TWO18)
	{
	if (z[4] == 0)
	{
	for (i = 5; i <= p; i++)
	{
	if (X[i] == 0)
	continue;
	else
	{
	z[3] += 1;
	break;
	}
	}
	}
	else
	z[3] += 1;
	}

	c = (z[1] + R * (z[2] + R * z[3])) / a;
	}

	c *= X[0];

	for (i = 1; i < EX; i++)
	c *= RADIX;
	for (i = 1; i > EX; i--)
	c *= RADIXI;

	*y = c;
	# undef R
	}

	/* Convert a multiple precision number *X into a double precision
	number Y, Denormal case (\|x\| < 2(-1022))). /
	static void
	denorm (const mp_no x, double y, int p)
	{
	long i, k;
	long p2 = p;
	double c;
	mantissa_t u, z[5];

	# define R RADIXI
	if (EX < -44 \|\| (EX == -44 && X[1] < TWO5))
	{
	*y = 0;
	return;
	}

	if (p2 == 1)
	{
	if (EX == -42)
	{
	z[1] = X[1] + TWO10;
	z[2] = 0;
	z[3] = 0;
	k = 3;
	}
	else if (EX == -43)
	{
	z[1] = TWO10;
	z[2] = X[1];
	z[3] = 0;
	k = 2;
	}
	else
	{
	z[1] = TWO10;
	z[2] = 0;
	z[3] = X[1];
	k = 1;
	}
	}
	else if (p2 == 2)
	{
	if (EX == -42)
	{
	z[1] = X[1] + TWO10;
	z[2] = X[2];
	z[3] = 0;
	k = 3;
	}
	else if (EX == -43)
	{
	z[1] = TWO10;
	z[2] = X[1];
	z[3] = X[2];
	k = 2;
	}
	else
	{
	z[1] = TWO10;
	z[2] = 0;
	z[3] = X[1];
	k = 1;
	}
	}
	else
	{
	if (EX == -42)
	{
	z[1] = X[1] + TWO10;
	z[2] = X[2];
	k = 3;
	}
	else if (EX == -43)
	{
	z[1] = TWO10;
	z[2] = X[1];
	k = 2;
	}
	else
	{
	z[1] = TWO10;
	z[2] = 0;
	k = 1;
	}
	z[3] = X[k];
	}

	u = ALIGN_DOWN_TO (z[3], TWO5);

	if (u == z[3])
	{
	for (i = k + 1; i <= p2; i++)
	{
	if (X[i] == 0)
	continue;
	else
	{
	z[3] += 1;
	break;
	}
	}
	}

	c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);

	y = c TWOM1032;
	# undef R
	}

	/* Convert multiple precision number X into double precision number Y. The
	result is correctly rounded to the nearest/even. */
	void
	__mp_dbl (const mp_no x, double y, int p)
	{
	if (X[0] == 0)
	{
	*y = 0;
	return;
	}

	if (__glibc_likely (EX > -42 \|\| (EX == -42 && X[1] >= TWO10)))
	norm (x, y, p);
	else
	denorm (x, y, p);
	}
	#endif

	/* Get the multiple precision equivalent of X into *Y. If the precision is too
	small, the result is truncated. */
	void
	SECTION
	__dbl_mp (double x, mp_no *y, int p)
	{
	long i, n;
	long p2 = p;

	/* Sign. */
	if (x == 0)
	{
	Y[0] = 0;
	return;
	}
	else if (x > 0)
	Y[0] = 1;
	else
	{
	Y[0] = -1;
	x = -x;
	}

	/* Exponent. */
	for (EY = 1; x >= RADIX; EY += 1)
	x *= RADIXI;
	for (; x < 1; EY -= 1)
	x *= RADIX;

	/* Digits. */
	n = MIN (p2, 4);
	for (i = 1; i <= n; i++)
	{
	INTEGER_OF (x, Y[i]);
	x *= RADIX;
	}
	for (; i <= p2; i++)
	Y[i] = 0;
	}

	/* Add magnitudes of X and Y assuming that abs (X) >= abs (Y) > 0. The
	sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
	Y and Z. No guard digit is used. The result equals the exact sum,
	truncated. */
	static void
	SECTION
	add_magnitudes (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	long i, j, k;
	long p2 = p;
	mantissa_t zk;

	EZ = EX;

	i = p2;
	j = p2 + EY - EX;
	k = p2 + 1;

	if (__glibc_unlikely (j < 1))
	{
	__cpy (x, z, p);
	return;
	}

	zk = 0;

	for (; j > 0; i--, j--)
	{
	zk += X[i] + Y[j];
	if (zk >= RADIX)
	{
	Z[k--] = zk - RADIX;
	zk = 1;
	}
	else
	{
	Z[k--] = zk;
	zk = 0;
	}
	}

	for (; i > 0; i--)
	{
	zk += X[i];
	if (zk >= RADIX)
	{
	Z[k--] = zk - RADIX;
	zk = 1;
	}
	else
	{
	Z[k--] = zk;
	zk = 0;
	}
	}

	if (zk == 0)
	{
	for (i = 1; i <= p2; i++)
	Z[i] = Z[i + 1];
	}
	else
	{
	Z[1] = zk;
	EZ += 1;
	}
	}

	/* Subtract the magnitudes of X and Y assuming that abs (x) > abs (y) > 0.
	The sign of the difference *Z is not changed. X and Y may overlap but not X
	and Z or Y and Z. One guard digit is used. The error is less than one
	ULP. */
	static void
	SECTION
	sub_magnitudes (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	long i, j, k;
	long p2 = p;
	mantissa_t zk;

	EZ = EX;
	i = p2;
	j = p2 + EY - EX;
	k = p2;

	/* Y is too small compared to X, copy X over to the result. */
	if (__glibc_unlikely (j < 1))
	{
	__cpy (x, z, p);
	return;
	}

	/* The relevant least significant digit in Y is non-zero, so we factor it in
	to enhance accuracy. */
	if (j < p2 && Y[j + 1] > 0)
	{
	Z[k + 1] = RADIX - Y[j + 1];
	zk = -1;
	}
	else
	zk = Z[k + 1] = 0;

	/* Subtract and borrow. */
	for (; j > 0; i--, j--)
	{
	zk += (X[i] - Y[j]);
	if (zk < 0)
	{
	Z[k--] = zk + RADIX;
	zk = -1;
	}
	else
	{
	Z[k--] = zk;
	zk = 0;
	}
	}

	/* We're done with digits from Y, so it's just digits in X. */
	for (; i > 0; i--)
	{
	zk += X[i];
	if (zk < 0)
	{
	Z[k--] = zk + RADIX;
	zk = -1;
	}
	else
	{
	Z[k--] = zk;
	zk = 0;
	}
	}

	/* Normalize. */
	for (i = 1; Z[i] == 0; i++)
	;
	EZ = EZ - i + 1;
	for (k = 1; i <= p2 + 1; )
	Z[k++] = Z[i++];
	for (; k <= p2; )
	Z[k++] = 0;
	}

	/* Add X and Y and store the result in *Z. X and Y may overlap, but not X
	and Z or Y and Z. One guard digit is used. The error is less than one
	ULP. */
	void
	SECTION
	__add (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	int n;

	if (X[0] == 0)
	{
	__cpy (y, z, p);
	return;
	}
	else if (Y[0] == 0)
	{
	__cpy (x, z, p);
	return;
	}

	if (X[0] == Y[0])
	{
	if (__acr (x, y, p) > 0)
	{
	add_magnitudes (x, y, z, p);
	Z[0] = X[0];
	}
	else
	{
	add_magnitudes (y, x, z, p);
	Z[0] = Y[0];
	}
	}
	else
	{
	if ((n = __acr (x, y, p)) == 1)
	{
	sub_magnitudes (x, y, z, p);
	Z[0] = X[0];
	}
	else if (n == -1)
	{
	sub_magnitudes (y, x, z, p);
	Z[0] = Y[0];
	}
	else
	Z[0] = 0;
	}
	}

	/* Subtract Y from X and return the result in *Z. X and Y may overlap but
	not X and Z or Y and Z. One guard digit is used. The error is less than
	one ULP. */
	void
	SECTION
	__sub (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	int n;

	if (X[0] == 0)
	{
	__cpy (y, z, p);
	Z[0] = -Z[0];
	return;
	}
	else if (Y[0] == 0)
	{
	__cpy (x, z, p);
	return;
	}

	if (X[0] != Y[0])
	{
	if (__acr (x, y, p) > 0)
	{
	add_magnitudes (x, y, z, p);
	Z[0] = X[0];
	}
	else
	{
	add_magnitudes (y, x, z, p);
	Z[0] = -Y[0];
	}
	}
	else
	{
	if ((n = __acr (x, y, p)) == 1)
	{
	sub_magnitudes (x, y, z, p);
	Z[0] = X[0];
	}
	else if (n == -1)
	{
	sub_magnitudes (y, x, z, p);
	Z[0] = -Y[0];
	}
	else
	Z[0] = 0;
	}
	}

	#ifndef NO__MUL
	/* Multiply X and Y and store result in *Z. X and Y may overlap but not X
	and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
	digits. In case P > 3 the error is bounded by 1.001 ULP. */
	void
	SECTION
	__mul (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	long i, j, k, ip, ip2;
	long p2 = p;
	mantissa_store_t zk;
	const mp_no *a;
	mantissa_store_t *diag;

	/* Is z=0? */
	if (__glibc_unlikely (X[0] * Y[0] == 0))
	{
	Z[0] = 0;
	return;
	}

	/* We need not iterate through all X's and Y's since it's pointless to
	multiply zeroes. Here, both are zero... */
	for (ip2 = p2; ip2 > 0; ip2--)
	if (X[ip2] != 0 \|\| Y[ip2] != 0)
	break;

	a = X[ip2] != 0 ? y : x;

	/* ... and here, at least one of them is still zero. */
	for (ip = ip2; ip > 0; ip--)
	if (a->d[ip] != 0)
	break;

	/* The product looks like this for p = 3 (as an example):


	a1 a2 a3
	x b1 b2 b3
	-----------------------------
	a1b3 a2b3 a3*b3
	a1b2 a2b2 a3*b2
	a1b1 a2b1 a3*b1

	So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
	for P >= 3. We compute the above digits in two parts; the last P-1
	digits and then the first P digits. The last P-1 digits are a sum of
	products of the input digits from P to P-k where K is 0 for the least
	significant digit and increases as we go towards the left. The product
	term is of the form X[k]*X[P-k] as can be seen in the above example.

	The first P digits are also a sum of products with the same product term,
	except that the sum is from 1 to k. This is also evident from the above
	example.

	Another thing that becomes evident is that only the most significant
	ip+ip2 digits of the result are non-zero, where ip and ip2 are the
	'internal precision' of the input numbers, i.e. digits after ip and ip2
	are all 0. */

	k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;

	while (k > ip + ip2 + 1)
	Z[k--] = 0;

	zk = 0;

	/* Precompute sums of diagonal elements so that we can directly use them
	later. See the next comment to know we why need them. */
	diag = alloca (k * sizeof (mantissa_store_t));
	mantissa_store_t d = 0;
	for (i = 1; i <= ip; i++)
	{
	d += X[i] * (mantissa_store_t) Y[i];
	diag[i] = d;
	}
	while (i < k)
	diag[i++] = d;

	while (k > p2)
	{
	long lim = k / 2;

	if (k % 2 == 0)
	/* We want to add this only once, but since we subtract it in the sum
	of products above, we add twice. */
	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];

	for (i = k - p2, j = p2; i < j; i++, j--)
	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);

	zk -= diag[k - 1];

	DIV_RADIX (zk, Z[k]);
	k--;
	}

	/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
	goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
	number of multiplications, we halve the range and if k is an even number,
	add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
	X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].

	This reduction tells us that we're summing two things, the first term
	through the half range and the negative of the sum of the product of all
	terms of X and Y in the full range. i.e.

	SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
	a single loop so that it completes in O(n) time and can hence be directly
	used in the loop below. */
	while (k > 1)
	{
	long lim = k / 2;

	if (k % 2 == 0)
	/* We want to add this only once, but since we subtract it in the sum
	of products above, we add twice. */
	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];

	for (i = 1, j = k - 1; i < j; i++, j--)
	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);

	zk -= diag[k - 1];

	DIV_RADIX (zk, Z[k]);
	k--;
	}
	Z[k] = zk;

	/* Get the exponent sum into an intermediate variable. This is a subtle
	optimization, where given enough registers, all operations on the exponent
	happen in registers and the result is written out only once into EZ. */
	int e = EX + EY;

	/* Is there a carry beyond the most significant digit? */
	if (__glibc_unlikely (Z[1] == 0))
	{
	for (i = 1; i <= p2; i++)
	Z[i] = Z[i + 1];
	e--;
	}

	EZ = e;
	Z[0] = X[0] * Y[0];
	}
	#endif

	#ifndef NO__SQR
	/* Square X and store result in Y. X and Y may not overlap. For P in
	[1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
	error is bounded by 1.001 ULP. This is a faster special case of
	multiplication. */
	void
	SECTION
	__sqr (const mp_no x, mp_no y, int p)
	{
	long i, j, k, ip;
	mantissa_store_t yk;

	/* Is z=0? */
	if (__glibc_unlikely (X[0] == 0))
	{
	Y[0] = 0;
	return;
	}

	/* We need not iterate through all X's since it's pointless to
	multiply zeroes. */
	for (ip = p; ip > 0; ip--)
	if (X[ip] != 0)
	break;

	k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;

	while (k > 2 * ip + 1)
	Y[k--] = 0;

	yk = 0;

	while (k > p)
	{
	mantissa_store_t yk2 = 0;
	long lim = k / 2;

	if (k % 2 == 0)
	yk += X[lim] * (mantissa_store_t) X[lim];

	/* In __mul, this loop (and the one within the next while loop) run
	between a range to calculate the mantissa as follows:

	Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
	+ X[n] * Y[k]

	For X == Y, we can get away with summing halfway and doubling the
	result. For cases where the range size is even, the mid-point needs
	to be added separately (above). */
	for (i = k - p, j = p; i < j; i++, j--)
	yk2 += X[i] * (mantissa_store_t) X[j];

	yk += 2 * yk2;

	DIV_RADIX (yk, Y[k]);
	k--;
	}

	while (k > 1)
	{
	mantissa_store_t yk2 = 0;
	long lim = k / 2;

	if (k % 2 == 0)
	yk += X[lim] * (mantissa_store_t) X[lim];

	/* Likewise for this loop. */
	for (i = 1, j = k - 1; i < j; i++, j--)
	yk2 += X[i] * (mantissa_store_t) X[j];

	yk += 2 * yk2;

	DIV_RADIX (yk, Y[k]);
	k--;
	}
	Y[k] = yk;

	/* Squares are always positive. */
	Y[0] = 1;

	/* Get the exponent sum into an intermediate variable. This is a subtle
	optimization, where given enough registers, all operations on the exponent
	happen in registers and the result is written out only once into EZ. */
	int e = EX * 2;

	/* Is there a carry beyond the most significant digit? */
	if (__glibc_unlikely (Y[1] == 0))
	{
	for (i = 1; i <= p; i++)
	Y[i] = Y[i + 1];
	e--;
	}

	EY = e;
	}
	#endif

	/* Invert X and store in Y. Relative error bound:
	- For P = 2: 1.001 * R ^ (1 - P)
	- For P = 3: 1.063 * R ^ (1 - P)
	- For P > 3: 2.001 * R ^ (1 - P)

	X = 0 is not permissible. /
	static void
	SECTION
	__inv (const mp_no x, mp_no y, int p)
	{
	long i;
	double t;
	mp_no z, w;
	static const int np1[] =
	{ 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
	};

	__cpy (x, &z, p);
	z.e = 0;
	__mp_dbl (&z, &t, p);
	t = 1 / t;
	__dbl_mp (t, y, p);
	EY -= EX;

	for (i = 0; i < np1[p]; i++)
	{
	__cpy (y, &w, p);
	__mul (x, &w, y, p);
	__sub (&mptwo, y, &z, p);
	__mul (&w, &z, y, p);
	}
	}

	/* Divide X by Y and store result in *Z. X and Y may overlap but not X and Z
	or Y and Z. Relative error bound:
	- For P = 2: 2.001 * R ^ (1 - P)
	- For P = 3: 2.063 * R ^ (1 - P)
	- For P > 3: 3.001 * R ^ (1 - P)

	X = 0 is not permissible. /
	void
	SECTION
	__dvd (const mp_no x, const mp_no y, mp_no *z, int p)
	{
	mp_no w;

	if (X[0] == 0)
	Z[0] = 0;
	else
	{
	__inv (y, &w, p);
	__mul (x, &w, z, p);
	}
	}