src/acosh.c - open64_libacml_mv - Git at Google


 /*
 *  Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  This file is part of libacml_mv.
 *
 *  libacml_mv is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  libacml_mv is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with libacml_mv.  If not, see
 *  <http://www.gnu.org/licenses/>.
 *
 */


 #include "../inc/libm_amd.h"
 #include "../inc/libm_util_amd.h"

 #define USE_NAN_WITH_FLAGS
 #define USE_HANDLE_ERROR
 #define USE_LOG_KERNEL_AMD
 #include "../inc/libm_inlines_amd.h"
 #undef USE_NAN_WITH_FLAGS
 #undef USE_HANDLE_ERROR
 #undef USE_LOG_KERNEL_AMD

 #include "../inc/libm_errno_amd.h"

 #ifndef WINDOWS
 /* Deal with errno for out-of-range argument */
 static inline double retval_errno_edom(double x)
 {
   struct exception exc;
   exc.arg1 = x;
   exc.arg2 = x;
   exc.type = DOMAIN;
   exc.name = (char *)"acosh";
   if (_LIB_VERSION == _SVID_)
     exc.retval = -HUGE;
   else
     exc.retval = nan_with_flags(AMD_F_INVALID);
   if (_LIB_VERSION == _POSIX_)
     __set_errno(EDOM);
   else if (!matherr(&exc))
     {
       if(_LIB_VERSION == _SVID_)
         (void)fputs("acosh: DOMAIN error\n", stderr);
     __set_errno(EDOM);
     }
   return exc.retval;
 }
 #endif

 #undef _FUNCNAME
 #define _FUNCNAME "acosh"
 double FN_PROTOTYPE(acosh)(double x)
 {

   unsigned long long ux;
   double r, rarg, r1, r2;
   int xexp;

   static const unsigned long long
     recrteps = 0x4196a09e667f3bcd; /* 1/sqrt(eps) = 9.49062656242515593767e+07 */
   /* log2_lead and log2_tail sum to an extra-precise version
      of log(2) */

   static const double
     log2_lead = 6.93147122859954833984e-01,  /* 0x3fe62e42e0000000 */
     log2_tail = 5.76999904754328540596e-08;  /* 0x3e6efa39ef35793c */


   GET_BITS_DP64(x, ux);

   if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
     {
       /* x is either NaN or infinity */
       if (ux & MANTBITS_DP64)
         {
           /* x is NaN */
 #ifdef WINDOWS
           return handle_error(_FUNCNAME, ux|0x0008000000000000, _DOMAIN,
                               AMD_F_INVALID, EDOM, x, 0.0);
 #else
           return x + x; /* Raise invalid if it is a signalling NaN */
 #endif
         }
       else
         {
           /* x is infinity */
           if (ux & SIGNBIT_DP64)
             /* x is negative infinity. Return a NaN. */
 #ifdef WINDOWS
             return handle_error(_FUNCNAME, INDEFBITPATT_DP64, _DOMAIN,
                                 AMD_F_INVALID, EDOM, x, 0.0);
 #else
             return retval_errno_edom(x);
 #endif
           else
             /* Return positive infinity with no signal */
             return x;
         }
     }
   else if ((ux & SIGNBIT_DP64) || (ux <= 0x3ff0000000000000))
     {
       /* x <= 1.0 */
       if (ux == 0x3ff0000000000000)
         {
           /* x = 1.0; return zero. */
           return 0.0;
         }
       else
         {
           /* x is less than 1.0. Return a NaN. */
 #ifdef WINDOWS
           return handle_error(_FUNCNAME, INDEFBITPATT_DP64, _DOMAIN,
                               AMD_F_INVALID, EDOM, x, 0.0);
 #else
           return retval_errno_edom(x);
 #endif
         }
     }


   if (ux > recrteps)
     {
       /* Arguments greater than 1/sqrt(epsilon) in magnitude are
          approximated by acosh(x) = ln(2) + ln(x) */
       /* log_kernel_amd(x) returns xexp, r1, r2 such that
          log(x) = xexp*log(2) + r1 + r2 */
       log_kernel_amd64(x, ux, &xexp, &r1, &r2);
       /* Add (xexp+1) * log(2) to z1,z2 to get the result acosh(x).
          The computed r1 is not subject to rounding error because
          (xexp+1) has at most 10 significant bits, log(2) has 24 significant
          bits, and r1 has up to 24 bits; and the exponents of r1
          and r2 differ by at most 6. */
       r1 = ((xexp+1) * log2_lead + r1);
       r2 = ((xexp+1) * log2_tail + r2);
       return r1 + r2;
     }
   else if (ux >= 0x4060000000000000)
     {
       /* 128.0 <= x <= 1/sqrt(epsilon) */
       /* acosh for these arguments is approximated by
          acosh(x) = ln(x + sqrt(x*x-1)) */
       rarg = x*x-1.0;
       /* Use assembly instruction to compute r = sqrt(rarg); */
       ASMSQRT(rarg,r);
       r += x;
       GET_BITS_DP64(r, ux);
       log_kernel_amd64(r, ux, &xexp, &r1, &r2);
       r1 = (xexp * log2_lead + r1);
       r2 = (xexp * log2_tail + r2);
       return r1 + r2;
     }
   else
     {
       /* 1.0 < x <= 128.0 */
       double u1, u2, v1, v2, w1, w2, hx, tx, t, r, s, p1, p2, a1, a2, c1, c2,
         poly;
       if (ux >= 0x3ff8000000000000)
         {
           /* 1.5 <= x <= 128.0 */
           /* We use minimax polynomials,
              based on Abramowitz and Stegun 4.6.32 series
              expansion for acosh(x), with the log(2x) and 1/(2.2.x^2)
              terms removed. We compensate for these two terms later.
           */
           t = x*x;
           if (ux >= 0x4040000000000000)
             {
               /* [3,2] for 32.0 <= x <= 128.0 */
               poly =
                 (0.45995704464157438175e-9 +
                  (-0.89080839823528631030e-9 +
                   (-0.10370522395596168095e-27 +
                    0.35255386405811106347e-32 * t) * t) * t) /
                 (0.21941191335882074014e-8 +
                  (-0.10185073058358334569e-7 +
                   0.95019562478430648685e-8 * t) * t);
             }
           else if (ux >= 0x4020000000000000)
             {
               /* [3,3] for 8.0 <= x <= 32.0 */
               poly =
                 (-0.54903656589072526589e-10 +
                  (0.27646792387218569776e-9 +
                   (-0.26912957240626571979e-9 -
                    0.86712268396736384286e-29 * t) * t) * t) /
                 (-0.24327683788655520643e-9 +
                  (0.20633757212593175571e-8 +
                   (-0.45438330985257552677e-8 +
                    0.28707154390001678580e-8 * t) * t) * t);
             }
           else if (ux >= 0x4010000000000000)
             {
               /* [4,3] for 4.0 <= x <= 8.0 */
               poly =
                 (-0.20827370596738166108e-6 +
                  (0.10232136919220422622e-5 +
                   (-0.98094503424623656701e-6 +
                    (-0.11615338819596146799e-18 +
                     0.44511847799282297160e-21 * t) * t) * t) * t) /
                 (-0.92579451630913718588e-6 +
                  (0.76997374707496606639e-5 +
                   (-0.16727286999128481170e-4 +
                    0.10463413698762590251e-4 * t) * t) * t);
             }
           else if (ux >= 0x4000000000000000)
             {
               /* [5,5] for 2.0 <= x <= 4.0 */
               poly =
                 (-0.122195030526902362060e-7 +
                  (0.157894522814328933143e-6 +
                   (-0.579951798420930466109e-6 +
                    (0.803568881125803647331e-6 +
                     (-0.373906657221148667374e-6 -
                      0.317856399083678204443e-21 * t) * t) * t) * t) * t) /
                 (-0.516260096352477148831e-7 +
                  (0.894662592315345689981e-6 +
                   (-0.475662774453078218581e-5 +
                    (0.107249291567405130310e-4 +
                     (-0.107871445525891289759e-4 +
                      0.398833767702587224253e-5 * t) * t) * t) * t) * t);
             }
           else if (ux >= 0x3ffc000000000000)
             {
               /* [5,4] for 1.75 <= x <= 2.0 */
               poly =
                 (0.1437926821253825186e-3 +
                  (-0.1034078230246627213e-2 +
                   (0.2015310005461823437e-2 +
                    (-0.1159685218876828075e-2 +
                     (-0.9267353551307245327e-11 +
                      0.2880267770324388034e-12 * t) * t) * t) * t) * t) /
                 (0.6305521447028109891e-3 +
                  (-0.6816525887775002944e-2 +
                   (0.2228081831550003651e-1 +
                    (-0.2836886105406603318e-1 +
                     0.1236997707206036752e-1 * t) * t) * t) * t);
             }
           else
             {
               /* [5,4] for 1.5 <= x <= 1.75 */
               poly =
                 ( 0.7471936607751750826e-3 +
                   (-0.4849405284371905506e-2 +
                    (0.8823068059778393019e-2 +
                     (-0.4825395461288629075e-2 +
                      (-0.1001984320956564344e-8 +
                       0.4299919281586749374e-10 * t) * t) * t) * t) * t) /
                 (0.3322359141239411478e-2 +
                  (-0.3293525930397077675e-1 +
                   (0.1011351440424239210e0 +
                    (-0.1227083591622587079e0 +
                     0.5147099404383426080e-1 * t) * t) * t) * t);
             }
           GET_BITS_DP64(x, ux);
           log_kernel_amd64(x, ux, &xexp, &r1, &r2);
           r1 = ((xexp+1) * log2_lead + r1);
           r2 = ((xexp+1) * log2_tail + r2);
           /* Now (r1,r2) sum to log(2x). Subtract the term
              1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
              to maintain precision. (Note that we add poly/t
              rather than poly because of the *x factor used
              when generating the minimax polynomial) */
           v2 = (poly-0.25)/t;
           r = v2 + r1;
           s = ((r1 - r) + v2) + r2;
           v1 = r + s;
           return v1 + ((r - v1) + s);
         }

       /* Here 1.0 <= x <= 1.5. It is hard to maintain accuracy here so
          we have to go to great lengths to do so. */

       /* We compute the value
            t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0))
          using simulated quad precision. */
       t = x - 1.0;
       u1 = t * 2.0;

       /* dekker_mul12(t,t,&v1,&v2); */
       GET_BITS_DP64(t, ux);
       ux &= 0xfffffffff8000000;
       PUT_BITS_DP64(ux, hx);
       tx = t - hx;
       v1 = t * t;
       v2 = (((hx * hx - v1) + hx * tx) + tx * hx) + tx * tx;

       /* dekker_add2(u1,0.0,v1,v2,&w1,&w2); */
       r = u1 + v1;
       s = (((u1 - r) + v1) + v2);
       w1 = r + s;
       w2 = (r - w1) + s;

       /* dekker_sqrt2(w1,w2,&u1,&u2); */
       ASMSQRT(w1,p1);
       GET_BITS_DP64(p1, ux);
       ux &= 0xfffffffff8000000;
       PUT_BITS_DP64(ux, c1);
       c2 = p1 - c1;
       a1 = p1 * p1;
       a2 = (((c1 * c1 - a1) + c1 * c2) + c2 * c1) + c2 * c2;
       p2 = (((w1 - a1) - a2) + w2) * 0.5 / p1;
       u1 = p1 + p2;
       u2 = (p1 - u1) + p2;

       /* dekker_add2(u1,u2,t,0.0,&v1,&v2); */
       r = u1 + t;
       s = (((u1 - r) + t)) + u2;
       r1 = r + s;
       r2 = (r - r1) + s;
       t = r1 + r2;

       /* Check for x close to 1.0. */
       if (x < 1.13)
         {
           /* Here 1.0 <= x < 1.13 implies r <= 0.656. In this region
              we need to take extra care to maintain precision.
              We have t = r1 + r2 = (x - 1.0 + sqrt(x*x-1.0))
              to more than basic precision. We use the Taylor series
              for log(1+x), with terms after the O(x*x) term
              approximated by a [6,6] minimax polynomial. */
           double b1, b2, c1, c2, e1, e2, q1, q2, c, cc, hr1, tr1, hpoly, tpoly, hq1, tq1, hr2, tr2;
           poly =
             (0.30893760556597282162e-21 +
              (0.10513858797132174471e0 +
               (0.27834538302122012381e0 +
                (0.27223638654807468186e0 +
                 (0.12038958198848174570e0 +
                  (0.23357202004546870613e-1 +
                   (0.15208417992520237648e-2 +
                    0.72741030690878441996e-7 * t) * t) * t) * t) * t) * t) * t) /
             (0.31541576391396523486e0 +
              (0.10715979719991342022e1 +
               (0.14311581802952004012e1 +
                (0.94928647994421895988e0 +
                 (0.32396235926176348977e0 +
                  (0.52566134756985833588e-1 +
                   0.30477895574211444963e-2 * t) * t) * t) * t) * t) * t);

           /* Now we can compute the result r = acosh(x) = log1p(t)
              using the formula t - 0.5*t*t + poly*t*t. Since t is
              represented as r1+r2, the formula becomes
              r = r1+r2 - 0.5*(r1+r2)*(r1+r2) + poly*(r1+r2)*(r1+r2).
              Expanding out, we get
                r = r1 + r2 - (0.5 + poly)*(r1*r1 + 2*r1*r2 + r2*r2)
              and ignoring negligible quantities we get
                r = r1 + r2 - 0.5*r1*r1 + r1*r2 + poly*t*t
           */
           if (x < 1.06)
             {
               double b, c, e;
               b = r1*r2;
               c = 0.5*r1*r1;
               e = poly*t*t;
               /* N.B. the order of additions and subtractions is important */
               r = (((r2 - b) + e) - c) + r1;
               return r;
             }
           else
             {
               /* For 1.06 <= x <= 1.13 we must evaluate in extended precision
                  to reach about 1 ulp accuracy (in this range the simple code
                  above only manages about 1.5 ulp accuracy) */

               /* Split poly, r1 and r2 into head and tail sections */
               GET_BITS_DP64(poly, ux);
               ux &= 0xfffffffff8000000;
               PUT_BITS_DP64(ux,hpoly);
               tpoly = poly - hpoly;
               GET_BITS_DP64(r1,ux);
               ux &= 0xfffffffff8000000;
               PUT_BITS_DP64(ux,hr1);
               tr1 = r1 - hr1;
               GET_BITS_DP64(r2, ux);
               ux &= 0xfffffffff8000000;
               PUT_BITS_DP64(ux,hr2);
               tr2 = r2 - hr2;

               /* e = poly*t*t */
               c = poly * r1;
               cc = (((hpoly * hr1 - c) + hpoly * tr1) + tpoly * hr1) + tpoly * tr1;
               cc = poly * r2 + cc;
               q1 = c + cc;
               q2 = (c - q1) + cc;
               GET_BITS_DP64(q1, ux);
               ux &= 0xfffffffff8000000;
               PUT_BITS_DP64(ux,hq1);
               tq1 = q1 - hq1;
               c = q1 * r1;
               cc = (((hq1 * hr1 - c) + hq1 * tr1) + tq1 * hr1) + tq1 * tr1;
               cc = q1 * r2 + q2 * r1 + cc;
               e1 = c + cc;
               e2 = (c - e1) + cc;

               /* b = r1*r2 */
               b1 = r1 * r2;
               b2 = (((hr1 * hr2 - b1) + hr1 * tr2) + tr1 * hr2) + tr1 * tr2;

               /* c = 0.5*r1*r1 */
               c1 = (0.5*r1) * r1;
               c2 = (((0.5*hr1 * hr1 - c1) + 0.5*hr1 * tr1) + 0.5*tr1 * hr1) + 0.5*tr1 * tr1;

               /* v = a + d - b */
               r = r1 - b1;
               s = (((r1 - r) - b1) - b2) + r2;
               v1 = r + s;
               v2 = (r - v1) + s;

               /* w = (a + d - b) - c */
               r = v1 - c1;
               s = (((v1 - r) - c1) - c2) + v2;
               w1 = r + s;
               w2 = (r - w1) + s;

               /* u = ((a + d - b) - c) + e */
               r = w1 + e1;
               s = (((w1 - r) + e1) + e2) + w2;
               u1 = r + s;
               u2 = (r - u1) + s;

               /* The result r = acosh(x) */
               r = u1 + u2;

               return r;
             }
         }
       else
         {
           /* For arguments 1.13 <= x <= 1.5 the log1p function
              is good enough */
             return FN_PROTOTYPE(log1p)(t);
         }
     }
 }

 weak_alias (__acosh, acosh)

	/*
	* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	*
	* This file is part of libacml_mv.
	*
	* libacml_mv is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* libacml_mv is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with libacml_mv. If not, see
	* <http://www.gnu.org/licenses/>.
	*
	*/


	#include "../inc/libm_amd.h"
	#include "../inc/libm_util_amd.h"

	#define USE_NAN_WITH_FLAGS
	#define USE_HANDLE_ERROR
	#define USE_LOG_KERNEL_AMD
	#include "../inc/libm_inlines_amd.h"
	#undef USE_NAN_WITH_FLAGS
	#undef USE_HANDLE_ERROR
	#undef USE_LOG_KERNEL_AMD

	#include "../inc/libm_errno_amd.h"

	#ifndef WINDOWS
	/* Deal with errno for out-of-range argument */
	static inline double retval_errno_edom(double x)
	{
	struct exception exc;
	exc.arg1 = x;
	exc.arg2 = x;
	exc.type = DOMAIN;
	exc.name = (char *)"acosh";
	if (_LIB_VERSION == _SVID_)
	exc.retval = -HUGE;
	else
	exc.retval = nan_with_flags(AMD_F_INVALID);
	if (_LIB_VERSION == _POSIX_)
	__set_errno(EDOM);
	else if (!matherr(&exc))
	{
	if(_LIB_VERSION == _SVID_)
	(void)fputs("acosh: DOMAIN error\n", stderr);
	__set_errno(EDOM);
	}
	return exc.retval;
	}
	#endif

	#undef _FUNCNAME
	#define _FUNCNAME "acosh"
	double FN_PROTOTYPE(acosh)(double x)
	{

	unsigned long long ux;
	double r, rarg, r1, r2;
	int xexp;

	static const unsigned long long
	recrteps = 0x4196a09e667f3bcd; /* 1/sqrt(eps) = 9.49062656242515593767e+07 */
	/* log2_lead and log2_tail sum to an extra-precise version
	of log(2) */

	static const double
	log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */
	log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */


	GET_BITS_DP64(x, ux);

	if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
	{
	/* x is either NaN or infinity */
	if (ux & MANTBITS_DP64)
	{
	/* x is NaN */
	#ifdef WINDOWS
	return handle_error(_FUNCNAME, ux\|0x0008000000000000, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	#else
	return x + x; /* Raise invalid if it is a signalling NaN */
	#endif
	}
	else
	{
	/* x is infinity */
	if (ux & SIGNBIT_DP64)
	/* x is negative infinity. Return a NaN. */
	#ifdef WINDOWS
	return handle_error(_FUNCNAME, INDEFBITPATT_DP64, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	#else
	return retval_errno_edom(x);
	#endif
	else
	/* Return positive infinity with no signal */
	return x;
	}
	}
	else if ((ux & SIGNBIT_DP64) \|\| (ux <= 0x3ff0000000000000))
	{
	/* x <= 1.0 */
	if (ux == 0x3ff0000000000000)
	{
	/* x = 1.0; return zero. */
	return 0.0;
	}
	else
	{
	/* x is less than 1.0. Return a NaN. */
	#ifdef WINDOWS
	return handle_error(_FUNCNAME, INDEFBITPATT_DP64, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	#else
	return retval_errno_edom(x);
	#endif
	}
	}


	if (ux > recrteps)
	{
	/* Arguments greater than 1/sqrt(epsilon) in magnitude are
	approximated by acosh(x) = ln(2) + ln(x) */
	/* log_kernel_amd(x) returns xexp, r1, r2 such that
	log(x) = xexplog(2) + r1 + r2 /
	log_kernel_amd64(x, ux, &xexp, &r1, &r2);
	/* Add (xexp+1) * log(2) to z1,z2 to get the result acosh(x).
	The computed r1 is not subject to rounding error because
	(xexp+1) has at most 10 significant bits, log(2) has 24 significant
	bits, and r1 has up to 24 bits; and the exponents of r1
	and r2 differ by at most 6. */
	r1 = ((xexp+1) * log2_lead + r1);
	r2 = ((xexp+1) * log2_tail + r2);
	return r1 + r2;
	}
	else if (ux >= 0x4060000000000000)
	{
	/* 128.0 <= x <= 1/sqrt(epsilon) */
	/* acosh for these arguments is approximated by
	acosh(x) = ln(x + sqrt(xx-1)) /
	rarg = x*x-1.0;
	/* Use assembly instruction to compute r = sqrt(rarg); */
	ASMSQRT(rarg,r);
	r += x;
	GET_BITS_DP64(r, ux);
	log_kernel_amd64(r, ux, &xexp, &r1, &r2);
	r1 = (xexp * log2_lead + r1);
	r2 = (xexp * log2_tail + r2);
	return r1 + r2;
	}
	else
	{
	/* 1.0 < x <= 128.0 */
	double u1, u2, v1, v2, w1, w2, hx, tx, t, r, s, p1, p2, a1, a2, c1, c2,
	poly;
	if (ux >= 0x3ff8000000000000)
	{
	/* 1.5 <= x <= 128.0 */
	/* We use minimax polynomials,
	based on Abramowitz and Stegun 4.6.32 series
	expansion for acosh(x), with the log(2x) and 1/(2.2.x^2)
	terms removed. We compensate for these two terms later.
	*/
	t = x*x;
	if (ux >= 0x4040000000000000)
	{
	/* [3,2] for 32.0 <= x <= 128.0 */
	poly =
	(0.45995704464157438175e-9 +
	(-0.89080839823528631030e-9 +
	(-0.10370522395596168095e-27 +
	0.35255386405811106347e-32 * t) * t) * t) /
	(0.21941191335882074014e-8 +
	(-0.10185073058358334569e-7 +
	0.95019562478430648685e-8 * t) * t);
	}
	else if (ux >= 0x4020000000000000)
	{
	/* [3,3] for 8.0 <= x <= 32.0 */
	poly =
	(-0.54903656589072526589e-10 +
	(0.27646792387218569776e-9 +
	(-0.26912957240626571979e-9 -
	0.86712268396736384286e-29 * t) * t) * t) /
	(-0.24327683788655520643e-9 +
	(0.20633757212593175571e-8 +
	(-0.45438330985257552677e-8 +
	0.28707154390001678580e-8 * t) * t) * t);
	}
	else if (ux >= 0x4010000000000000)
	{
	/* [4,3] for 4.0 <= x <= 8.0 */
	poly =
	(-0.20827370596738166108e-6 +
	(0.10232136919220422622e-5 +
	(-0.98094503424623656701e-6 +
	(-0.11615338819596146799e-18 +
	0.44511847799282297160e-21 * t) * t) * t) * t) /
	(-0.92579451630913718588e-6 +
	(0.76997374707496606639e-5 +
	(-0.16727286999128481170e-4 +
	0.10463413698762590251e-4 * t) * t) * t);
	}
	else if (ux >= 0x4000000000000000)
	{
	/* [5,5] for 2.0 <= x <= 4.0 */
	poly =
	(-0.122195030526902362060e-7 +
	(0.157894522814328933143e-6 +
	(-0.579951798420930466109e-6 +
	(0.803568881125803647331e-6 +
	(-0.373906657221148667374e-6 -
	0.317856399083678204443e-21 * t) * t) * t) * t) * t) /
	(-0.516260096352477148831e-7 +
	(0.894662592315345689981e-6 +
	(-0.475662774453078218581e-5 +
	(0.107249291567405130310e-4 +
	(-0.107871445525891289759e-4 +
	0.398833767702587224253e-5 * t) * t) * t) * t) * t);
	}
	else if (ux >= 0x3ffc000000000000)
	{
	/* [5,4] for 1.75 <= x <= 2.0 */
	poly =
	(0.1437926821253825186e-3 +
	(-0.1034078230246627213e-2 +
	(0.2015310005461823437e-2 +
	(-0.1159685218876828075e-2 +
	(-0.9267353551307245327e-11 +
	0.2880267770324388034e-12 * t) * t) * t) * t) * t) /
	(0.6305521447028109891e-3 +
	(-0.6816525887775002944e-2 +
	(0.2228081831550003651e-1 +
	(-0.2836886105406603318e-1 +
	0.1236997707206036752e-1 * t) * t) * t) * t);
	}
	else
	{
	/* [5,4] for 1.5 <= x <= 1.75 */
	poly =
	( 0.7471936607751750826e-3 +
	(-0.4849405284371905506e-2 +
	(0.8823068059778393019e-2 +
	(-0.4825395461288629075e-2 +
	(-0.1001984320956564344e-8 +
	0.4299919281586749374e-10 * t) * t) * t) * t) * t) /
	(0.3322359141239411478e-2 +
	(-0.3293525930397077675e-1 +
	(0.1011351440424239210e0 +
	(-0.1227083591622587079e0 +
	0.5147099404383426080e-1 * t) * t) * t) * t);
	}
	GET_BITS_DP64(x, ux);
	log_kernel_amd64(x, ux, &xexp, &r1, &r2);
	r1 = ((xexp+1) * log2_lead + r1);
	r2 = ((xexp+1) * log2_tail + r2);
	/* Now (r1,r2) sum to log(2x). Subtract the term
	1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
	to maintain precision. (Note that we add poly/t
	rather than poly because of the *x factor used
	when generating the minimax polynomial) */
	v2 = (poly-0.25)/t;
	r = v2 + r1;
	s = ((r1 - r) + v2) + r2;
	v1 = r + s;
	return v1 + ((r - v1) + s);
	}

	/* Here 1.0 <= x <= 1.5. It is hard to maintain accuracy here so
	we have to go to great lengths to do so. */

	/* We compute the value
	t = x - 1.0 + sqrt(2.0(x - 1.0) + (x - 1.0)(x - 1.0))
	using simulated quad precision. */
	t = x - 1.0;
	u1 = t * 2.0;

	/* dekker_mul12(t,t,&v1,&v2); */
	GET_BITS_DP64(t, ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux, hx);
	tx = t - hx;
	v1 = t * t;
	v2 = (((hx * hx - v1) + hx * tx) + tx * hx) + tx * tx;

	/* dekker_add2(u1,0.0,v1,v2,&w1,&w2); */
	r = u1 + v1;
	s = (((u1 - r) + v1) + v2);
	w1 = r + s;
	w2 = (r - w1) + s;

	/* dekker_sqrt2(w1,w2,&u1,&u2); */
	ASMSQRT(w1,p1);
	GET_BITS_DP64(p1, ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux, c1);
	c2 = p1 - c1;
	a1 = p1 * p1;
	a2 = (((c1 * c1 - a1) + c1 * c2) + c2 * c1) + c2 * c2;
	p2 = (((w1 - a1) - a2) + w2) * 0.5 / p1;
	u1 = p1 + p2;
	u2 = (p1 - u1) + p2;

	/* dekker_add2(u1,u2,t,0.0,&v1,&v2); */
	r = u1 + t;
	s = (((u1 - r) + t)) + u2;
	r1 = r + s;
	r2 = (r - r1) + s;
	t = r1 + r2;

	/* Check for x close to 1.0. */
	if (x < 1.13)
	{
	/* Here 1.0 <= x < 1.13 implies r <= 0.656. In this region
	we need to take extra care to maintain precision.
	We have t = r1 + r2 = (x - 1.0 + sqrt(x*x-1.0))
	to more than basic precision. We use the Taylor series
	for log(1+x), with terms after the O(x*x) term
	approximated by a [6,6] minimax polynomial. */
	double b1, b2, c1, c2, e1, e2, q1, q2, c, cc, hr1, tr1, hpoly, tpoly, hq1, tq1, hr2, tr2;
	poly =
	(0.30893760556597282162e-21 +
	(0.10513858797132174471e0 +
	(0.27834538302122012381e0 +
	(0.27223638654807468186e0 +
	(0.12038958198848174570e0 +
	(0.23357202004546870613e-1 +
	(0.15208417992520237648e-2 +
	0.72741030690878441996e-7 * t) * t) * t) * t) * t) * t) * t) /
	(0.31541576391396523486e0 +
	(0.10715979719991342022e1 +
	(0.14311581802952004012e1 +
	(0.94928647994421895988e0 +
	(0.32396235926176348977e0 +
	(0.52566134756985833588e-1 +
	0.30477895574211444963e-2 * t) * t) * t) * t) * t) * t);

	/* Now we can compute the result r = acosh(x) = log1p(t)
	using the formula t - 0.5tt + polytt. Since t is
	represented as r1+r2, the formula becomes
	r = r1+r2 - 0.5(r1+r2)(r1+r2) + poly(r1+r2)(r1+r2).
	Expanding out, we get
	r = r1 + r2 - (0.5 + poly)(r1r1 + 2r1r2 + r2*r2)
	and ignoring negligible quantities we get
	r = r1 + r2 - 0.5r1r1 + r1r2 + polyt*t
	*/
	if (x < 1.06)
	{
	double b, c, e;
	b = r1*r2;
	c = 0.5r1r1;
	e = polytt;
	/* N.B. the order of additions and subtractions is important */
	r = (((r2 - b) + e) - c) + r1;
	return r;
	}
	else
	{
	/* For 1.06 <= x <= 1.13 we must evaluate in extended precision
	to reach about 1 ulp accuracy (in this range the simple code
	above only manages about 1.5 ulp accuracy) */

	/* Split poly, r1 and r2 into head and tail sections */
	GET_BITS_DP64(poly, ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux,hpoly);
	tpoly = poly - hpoly;
	GET_BITS_DP64(r1,ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux,hr1);
	tr1 = r1 - hr1;
	GET_BITS_DP64(r2, ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux,hr2);
	tr2 = r2 - hr2;

	/* e = polytt */
	c = poly * r1;
	cc = (((hpoly * hr1 - c) + hpoly * tr1) + tpoly * hr1) + tpoly * tr1;
	cc = poly * r2 + cc;
	q1 = c + cc;
	q2 = (c - q1) + cc;
	GET_BITS_DP64(q1, ux);
	ux &= 0xfffffffff8000000;
	PUT_BITS_DP64(ux,hq1);
	tq1 = q1 - hq1;
	c = q1 * r1;
	cc = (((hq1 * hr1 - c) + hq1 * tr1) + tq1 * hr1) + tq1 * tr1;
	cc = q1 * r2 + q2 * r1 + cc;
	e1 = c + cc;
	e2 = (c - e1) + cc;

	/* b = r1r2 /
	b1 = r1 * r2;
	b2 = (((hr1 * hr2 - b1) + hr1 * tr2) + tr1 * hr2) + tr1 * tr2;

	/* c = 0.5r1r1 */
	c1 = (0.5r1) r1;
	c2 = (((0.5hr1 hr1 - c1) + 0.5hr1 tr1) + 0.5tr1 hr1) + 0.5tr1 tr1;

	/* v = a + d - b */
	r = r1 - b1;
	s = (((r1 - r) - b1) - b2) + r2;
	v1 = r + s;
	v2 = (r - v1) + s;

	/* w = (a + d - b) - c */
	r = v1 - c1;
	s = (((v1 - r) - c1) - c2) + v2;
	w1 = r + s;
	w2 = (r - w1) + s;

	/* u = ((a + d - b) - c) + e */
	r = w1 + e1;
	s = (((w1 - r) + e1) + e2) + w2;
	u1 = r + s;
	u2 = (r - u1) + s;

	/* The result r = acosh(x) */
	r = u1 + u2;

	return r;
	}
	}
	else
	{
	/* For arguments 1.13 <= x <= 1.5 the log1p function
	is good enough */
	return FN_PROTOTYPE(log1p)(t);
	}
	}
	}

	weak_alias (__acosh, acosh)