src/asinh.c - open64_libacml_mv - Git at Google


 /*
 *  Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  This file is part of libacml_mv.
 *
 *  libacml_mv is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  libacml_mv is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with libacml_mv.  If not, see
 *  <http://www.gnu.org/licenses/>.
 *
 */


 #include "../inc/libm_amd.h"
 #include "../inc/libm_util_amd.h"

 #define USE_HANDLE_ERROR
 #define USE_LOG_KERNEL_AMD
 #define USE_VAL_WITH_FLAGS
 #include "../inc/libm_inlines_amd.h"
 #undef USE_HANDLE_ERROR
 #undef USE_LOG_KERNEL_AMD
 #undef VAL_WITH_FLAGS

 #undef _FUNCNAME
 #define _FUNCNAME "asinh"
 double FN_PROTOTYPE(asinh)(double x)
 {

   unsigned long long ux, ax, xneg;
   double absx, r, rarg, t, r1, r2, poly, s, v1, v2;
   int xexp;

   static const unsigned long long
     rteps = 0x3e46a09e667f3bcd,    /* sqrt(eps) = 1.05367121277235086670e-08 */
     recrteps = 0x4196a09e667f3bcd; /* 1/rteps = 9.49062656242515593767e+07 */

   /* log2_lead and log2_tail sum to an extra-precise version
      of log(2) */
   static const double
     log2_lead = 6.93147122859954833984e-01,  /* 0x3fe62e42e0000000 */
     log2_tail = 5.76999904754328540596e-08;  /* 0x3e6efa39ef35793c */


   GET_BITS_DP64(x, ux);
   ax = ux & ~SIGNBIT_DP64;
   xneg = ux & SIGNBIT_DP64;
   PUT_BITS_DP64(ax, absx);

   if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
     {
       /* x is either NaN or infinity */
       if (ux & MANTBITS_DP64)
         {
           /* x is NaN */
 #ifdef WINDOWS
           return handle_error(_FUNCNAME, ux|0x0008000000000000, _DOMAIN,
                               AMD_F_INVALID, EDOM, x, 0.0);
 #else
           return x + x; /* Raise invalid if it is a signalling NaN */
 #endif
         }
       else
         {
           /* x is infinity. Return the same infinity. */
 #ifdef WINDOWS
           if (ux & SIGNBIT_DP64)
             return handle_error(_FUNCNAME, NINFBITPATT_DP64, _DOMAIN,
                                 AMD_F_INVALID, EDOM, x, 0.0);
           else
             return handle_error(_FUNCNAME, PINFBITPATT_DP64, _DOMAIN,
                                 AMD_F_INVALID, EDOM, x, 0.0);
 #else
           return x;
 #endif
         }
     }
   else if (ax < rteps) /* abs(x) < sqrt(epsilon) */
     {
       if (ax == 0x0000000000000000)
         {
           /* x is +/-zero. Return the same zero. */
           return x;
         }
       else
         {
           /* Tiny arguments approximated by asinh(x) = x
              - avoid slow operations on denormalized numbers */
           return val_with_flags(x,AMD_F_INEXACT);
         }
     }


   if (ax <= 0x3ff0000000000000) /* abs(x) <= 1.0 */
     {
       /* Arguments less than 1.0 in magnitude are
          approximated by [4,4] or [5,4] minimax polynomials
          fitted to asinh series 4.6.31 (x < 1) from Abramowitz and Stegun
       */
       t = x*x;
       if (ax < 0x3fd0000000000000)
         {
           /* [4,4] for 0 < abs(x) < 0.25 */
           poly =
             (-0.12845379283524906084997e0 +
              (-0.21060688498409799700819e0 +
               (-0.10188951822578188309186e0 +
                (-0.13891765817243625541799e-1 -
                 0.10324604871728082428024e-3 * t) * t) * t) * t) /
             (0.77072275701149440164511e0 +
              (0.16104665505597338100747e1 +
               (0.11296034614816689554875e1 +
                (0.30079351943799465092429e0 +
                 0.235224464765951442265117e-1 * t) * t) * t) * t);
         }
       else if (ax < 0x3fe0000000000000)
         {
           /* [4,4] for 0.25 <= abs(x) < 0.5 */
           poly =
             (-0.12186605129448852495563e0 +
              (-0.19777978436593069928318e0 +
               (-0.94379072395062374824320e-1 +
                (-0.12620141363821680162036e-1 -
                 0.903396794842691998748349e-4 * t) * t) * t) * t) /
             (0.73119630776696495279434e0 +
              (0.15157170446881616648338e1 +
               (0.10524909506981282725413e1 +
                (0.27663713103600182193817e0 +
                 0.21263492900663656707646e-1 * t) * t) * t) * t);
         }
       else if (ax < 0x3fe8000000000000)
         {
           /* [4,4] for 0.5 <= abs(x) < 0.75 */
           poly =
             (-0.81210026327726247622500e-1 +
              (-0.12327355080668808750232e0 +
               (-0.53704925162784720405664e-1 +
                (-0.63106739048128554465450e-2 -
                 0.35326896180771371053534e-4 * t) * t) * t) * t) /
             (0.48726015805581794231182e0 +
              (0.95890837357081041150936e0 +
               (0.62322223426940387752480e0 +
                (0.15028684818508081155141e0 +
                 0.10302171620320141529445e-1 * t) * t) * t) * t);
         }
       else
         {
           /* [5,4] for 0.75 <= abs(x) <= 1.0 */
           poly =
             (-0.4638179204422665073e-1 +
              (-0.7162729496035415183e-1 +
               (-0.3247795155696775148e-1 +
                (-0.4225785421291932164e-2 +
                 (-0.3808984717603160127e-4 +
                  0.8023464184964125826e-6 * t) * t) * t) * t) * t) /
             (0.2782907534642231184e0 +
              (0.5549945896829343308e0 +
               (0.3700732511330698879e0 +
                (0.9395783438240780722e-1 +
                 0.7200057974217143034e-2 * t) * t) * t) * t);
         }
       return x + x*t*poly;
     }
   else if (ax < 0x4040000000000000)
     {
       /* 1.0 <= abs(x) <= 32.0 */
       /* Arguments in this region are approximated by various
          minimax polynomials fitted to asinh series 4.6.31
          in Abramowitz and Stegun.
       */
       t = x*x;
       if (ax >= 0x4020000000000000)
         {
           /* [3,3] for 8.0 <= abs(x) <= 32.0 */
           poly =
             (-0.538003743384069117e-10 +
              (-0.273698654196756169e-9 +
               (-0.268129826956403568e-9 -
                0.804163374628432850e-29 * t) * t) * t) /
             (0.238083376363471960e-9 +
              (0.203579344621125934e-8 +
               (0.450836980450693209e-8 +
                0.286005148753497156e-8 * t) * t) * t);
         }
       else if (ax >= 0x4010000000000000)
         {
           /* [4,3] for 4.0 <= abs(x) <= 8.0 */
           poly =
             (-0.178284193496441400e-6 +
              (-0.928734186616614974e-6 +
               (-0.923318925566302615e-6 +
                (-0.776417026702577552e-19 +
                 0.290845644810826014e-21 * t) * t) * t) * t) /
             (0.786694697277890964e-6 +
              (0.685435665630965488e-5 +
               (0.153780175436788329e-4 +
                0.984873520613417917e-5 * t) * t) * t);

         }
       else if (ax >= 0x4000000000000000)
         {
           /* [5,4] for 2.0 <= abs(x) <= 4.0 */
           poly =
             (-0.209689451648100728e-6 +
              (-0.219252358028695992e-5 +
               (-0.551641756327550939e-5 +
                (-0.382300259826830258e-5 +
                 (-0.421182121910667329e-17 +
                  0.492236019998237684e-19 * t) * t) * t) * t) * t) /
             (0.889178444424237735e-6 +
              (0.131152171690011152e-4 +
               (0.537955850185616847e-4 +
                (0.814966175170941864e-4 +
                 0.407786943832260752e-4 * t) * t) * t) * t);
         }
       else if (ax >= 0x3ff8000000000000)
         {
           /* [5,4] for 1.5 <= abs(x) <= 2.0 */
           poly =
             (-0.195436610112717345e-4 +
              (-0.233315515113382977e-3 +
               (-0.645380957611087587e-3 +
                (-0.478948863920281252e-3 +
                 (-0.805234112224091742e-12 +
                  0.246428598194879283e-13 * t) * t) * t) * t) * t) /
             (0.822166621698664729e-4 +
              (0.135346265620413852e-2 +
               (0.602739242861830658e-2 +
                (0.972227795510722956e-2 +
                 0.510878800983771167e-2 * t) * t) * t) * t);
         }
       else
         {
           /* [5,5] for 1.0 <= abs(x) <= 1.5 */
           poly =
             (-0.121224194072430701e-4 +
              (-0.273145455834305218e-3 +
               (-0.152866982560895737e-2 +
                (-0.292231744584913045e-2 +
                 (-0.174670900236060220e-2 -
                  0.891754209521081538e-12 * t) * t) * t) * t) * t) /
             (0.499426632161317606e-4 +
              (0.139591210395547054e-2 +
               (0.107665231109108629e-1 +
                (0.325809818749873406e-1 +
                 (0.415222526655158363e-1 +
                  0.186315628774716763e-1 * t) * t) * t) * t) * t);
         }
       log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
       r1 = ((xexp+1) * log2_lead + r1);
       r2 = ((xexp+1) * log2_tail + r2);
       /* Now (r1,r2) sum to log(2x). Add the term
          1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
          to maintain precision. (Note that we add poly/t
          rather than poly because of the *x factor used
          when generating the minimax polynomial) */
       v2 = (poly+0.25)/t;
       r = v2 + r1;
       s = ((r1 - r) + v2) + r2;
       v1 = r + s;
       v2 = (r - v1) + s;
       r = v1 + v2;
       if (xneg)
         return -r;
       else
         return r;
     }
   else
     {
       /* abs(x) > 32.0 */
       if (ax > recrteps)
         {
           /* Arguments greater than 1/sqrt(epsilon) in magnitude are
              approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x */
           /* log_kernel_amd(x) returns xexp, r1, r2 such that
              log(x) = xexp*log(2) + r1 + r2 */
           log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
           /* Add (xexp+1) * log(2) to z1,z2 to get the result asinh(x).
              The computed r1 is not subject to rounding error because
              (xexp+1) has at most 10 significant bits, log(2) has 24 significant
              bits, and r1 has up to 24 bits; and the exponents of r1
              and r2 differ by at most 6. */
           r1 = ((xexp+1) * log2_lead + r1);
           r2 = ((xexp+1) * log2_tail + r2);
           if (xneg)
             return -(r1 + r2);
           else
             return r1 + r2;
         }
       else
         {
           rarg = absx*absx+1.0;
           /* Arguments such that 32.0 <= abs(x) <= 1/sqrt(epsilon) are
              approximated by
                asinh(x) = ln(abs(x) + sqrt(x*x+1))
              with the sign of x (see Abramowitz and Stegun 4.6.20) */
           /* Use assembly instruction to compute r = sqrt(rarg); */
           ASMSQRT(rarg,r);
           r += absx;
           GET_BITS_DP64(r, ax);
           log_kernel_amd64(r, ax, &xexp, &r1, &r2);
           r1 = (xexp * log2_lead + r1);
           r2 = (xexp * log2_tail + r2);
           if (xneg)
             return -(r1 + r2);
           else
             return r1 + r2;
         }
     }
 }

 weak_alias (__asinh, asinh)

	/*
	* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	*
	* This file is part of libacml_mv.
	*
	* libacml_mv is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* libacml_mv is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with libacml_mv. If not, see
	* <http://www.gnu.org/licenses/>.
	*
	*/


	#include "../inc/libm_amd.h"
	#include "../inc/libm_util_amd.h"

	#define USE_HANDLE_ERROR
	#define USE_LOG_KERNEL_AMD
	#define USE_VAL_WITH_FLAGS
	#include "../inc/libm_inlines_amd.h"
	#undef USE_HANDLE_ERROR
	#undef USE_LOG_KERNEL_AMD
	#undef VAL_WITH_FLAGS

	#undef _FUNCNAME
	#define _FUNCNAME "asinh"
	double FN_PROTOTYPE(asinh)(double x)
	{

	unsigned long long ux, ax, xneg;
	double absx, r, rarg, t, r1, r2, poly, s, v1, v2;
	int xexp;

	static const unsigned long long
	rteps = 0x3e46a09e667f3bcd, /* sqrt(eps) = 1.05367121277235086670e-08 */
	recrteps = 0x4196a09e667f3bcd; /* 1/rteps = 9.49062656242515593767e+07 */

	/* log2_lead and log2_tail sum to an extra-precise version
	of log(2) */
	static const double
	log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */
	log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */


	GET_BITS_DP64(x, ux);
	ax = ux & ~SIGNBIT_DP64;
	xneg = ux & SIGNBIT_DP64;
	PUT_BITS_DP64(ax, absx);

	if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
	{
	/* x is either NaN or infinity */
	if (ux & MANTBITS_DP64)
	{
	/* x is NaN */
	#ifdef WINDOWS
	return handle_error(_FUNCNAME, ux\|0x0008000000000000, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	#else
	return x + x; /* Raise invalid if it is a signalling NaN */
	#endif
	}
	else
	{
	/* x is infinity. Return the same infinity. */
	#ifdef WINDOWS
	if (ux & SIGNBIT_DP64)
	return handle_error(_FUNCNAME, NINFBITPATT_DP64, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	else
	return handle_error(_FUNCNAME, PINFBITPATT_DP64, _DOMAIN,
	AMD_F_INVALID, EDOM, x, 0.0);
	#else
	return x;
	#endif
	}
	}
	else if (ax < rteps) /* abs(x) < sqrt(epsilon) */
	{
	if (ax == 0x0000000000000000)
	{
	/* x is +/-zero. Return the same zero. */
	return x;
	}
	else
	{
	/* Tiny arguments approximated by asinh(x) = x
	- avoid slow operations on denormalized numbers */
	return val_with_flags(x,AMD_F_INEXACT);
	}
	}


	if (ax <= 0x3ff0000000000000) /* abs(x) <= 1.0 */
	{
	/* Arguments less than 1.0 in magnitude are
	approximated by [4,4] or [5,4] minimax polynomials
	fitted to asinh series 4.6.31 (x < 1) from Abramowitz and Stegun
	*/
	t = x*x;
	if (ax < 0x3fd0000000000000)
	{
	/* [4,4] for 0 < abs(x) < 0.25 */
	poly =
	(-0.12845379283524906084997e0 +
	(-0.21060688498409799700819e0 +
	(-0.10188951822578188309186e0 +
	(-0.13891765817243625541799e-1 -
	0.10324604871728082428024e-3 * t) * t) * t) * t) /
	(0.77072275701149440164511e0 +
	(0.16104665505597338100747e1 +
	(0.11296034614816689554875e1 +
	(0.30079351943799465092429e0 +
	0.235224464765951442265117e-1 * t) * t) * t) * t);
	}
	else if (ax < 0x3fe0000000000000)
	{
	/* [4,4] for 0.25 <= abs(x) < 0.5 */
	poly =
	(-0.12186605129448852495563e0 +
	(-0.19777978436593069928318e0 +
	(-0.94379072395062374824320e-1 +
	(-0.12620141363821680162036e-1 -
	0.903396794842691998748349e-4 * t) * t) * t) * t) /
	(0.73119630776696495279434e0 +
	(0.15157170446881616648338e1 +
	(0.10524909506981282725413e1 +
	(0.27663713103600182193817e0 +
	0.21263492900663656707646e-1 * t) * t) * t) * t);
	}
	else if (ax < 0x3fe8000000000000)
	{
	/* [4,4] for 0.5 <= abs(x) < 0.75 */
	poly =
	(-0.81210026327726247622500e-1 +
	(-0.12327355080668808750232e0 +
	(-0.53704925162784720405664e-1 +
	(-0.63106739048128554465450e-2 -
	0.35326896180771371053534e-4 * t) * t) * t) * t) /
	(0.48726015805581794231182e0 +
	(0.95890837357081041150936e0 +
	(0.62322223426940387752480e0 +
	(0.15028684818508081155141e0 +
	0.10302171620320141529445e-1 * t) * t) * t) * t);
	}
	else
	{
	/* [5,4] for 0.75 <= abs(x) <= 1.0 */
	poly =
	(-0.4638179204422665073e-1 +
	(-0.7162729496035415183e-1 +
	(-0.3247795155696775148e-1 +
	(-0.4225785421291932164e-2 +
	(-0.3808984717603160127e-4 +
	0.8023464184964125826e-6 * t) * t) * t) * t) * t) /
	(0.2782907534642231184e0 +
	(0.5549945896829343308e0 +
	(0.3700732511330698879e0 +
	(0.9395783438240780722e-1 +
	0.7200057974217143034e-2 * t) * t) * t) * t);
	}
	return x + xtpoly;
	}
	else if (ax < 0x4040000000000000)
	{
	/* 1.0 <= abs(x) <= 32.0 */
	/* Arguments in this region are approximated by various
	minimax polynomials fitted to asinh series 4.6.31
	in Abramowitz and Stegun.
	*/
	t = x*x;
	if (ax >= 0x4020000000000000)
	{
	/* [3,3] for 8.0 <= abs(x) <= 32.0 */
	poly =
	(-0.538003743384069117e-10 +
	(-0.273698654196756169e-9 +
	(-0.268129826956403568e-9 -
	0.804163374628432850e-29 * t) * t) * t) /
	(0.238083376363471960e-9 +
	(0.203579344621125934e-8 +
	(0.450836980450693209e-8 +
	0.286005148753497156e-8 * t) * t) * t);
	}
	else if (ax >= 0x4010000000000000)
	{
	/* [4,3] for 4.0 <= abs(x) <= 8.0 */
	poly =
	(-0.178284193496441400e-6 +
	(-0.928734186616614974e-6 +
	(-0.923318925566302615e-6 +
	(-0.776417026702577552e-19 +
	0.290845644810826014e-21 * t) * t) * t) * t) /
	(0.786694697277890964e-6 +
	(0.685435665630965488e-5 +
	(0.153780175436788329e-4 +
	0.984873520613417917e-5 * t) * t) * t);

	}
	else if (ax >= 0x4000000000000000)
	{
	/* [5,4] for 2.0 <= abs(x) <= 4.0 */
	poly =
	(-0.209689451648100728e-6 +
	(-0.219252358028695992e-5 +
	(-0.551641756327550939e-5 +
	(-0.382300259826830258e-5 +
	(-0.421182121910667329e-17 +
	0.492236019998237684e-19 * t) * t) * t) * t) * t) /
	(0.889178444424237735e-6 +
	(0.131152171690011152e-4 +
	(0.537955850185616847e-4 +
	(0.814966175170941864e-4 +
	0.407786943832260752e-4 * t) * t) * t) * t);
	}
	else if (ax >= 0x3ff8000000000000)
	{
	/* [5,4] for 1.5 <= abs(x) <= 2.0 */
	poly =
	(-0.195436610112717345e-4 +
	(-0.233315515113382977e-3 +
	(-0.645380957611087587e-3 +
	(-0.478948863920281252e-3 +
	(-0.805234112224091742e-12 +
	0.246428598194879283e-13 * t) * t) * t) * t) * t) /
	(0.822166621698664729e-4 +
	(0.135346265620413852e-2 +
	(0.602739242861830658e-2 +
	(0.972227795510722956e-2 +
	0.510878800983771167e-2 * t) * t) * t) * t);
	}
	else
	{
	/* [5,5] for 1.0 <= abs(x) <= 1.5 */
	poly =
	(-0.121224194072430701e-4 +
	(-0.273145455834305218e-3 +
	(-0.152866982560895737e-2 +
	(-0.292231744584913045e-2 +
	(-0.174670900236060220e-2 -
	0.891754209521081538e-12 * t) * t) * t) * t) * t) /
	(0.499426632161317606e-4 +
	(0.139591210395547054e-2 +
	(0.107665231109108629e-1 +
	(0.325809818749873406e-1 +
	(0.415222526655158363e-1 +
	0.186315628774716763e-1 * t) * t) * t) * t) * t);
	}
	log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
	r1 = ((xexp+1) * log2_lead + r1);
	r2 = ((xexp+1) * log2_tail + r2);
	/* Now (r1,r2) sum to log(2x). Add the term
	1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
	to maintain precision. (Note that we add poly/t
	rather than poly because of the *x factor used
	when generating the minimax polynomial) */
	v2 = (poly+0.25)/t;
	r = v2 + r1;
	s = ((r1 - r) + v2) + r2;
	v1 = r + s;
	v2 = (r - v1) + s;
	r = v1 + v2;
	if (xneg)
	return -r;
	else
	return r;
	}
	else
	{
	/* abs(x) > 32.0 */
	if (ax > recrteps)
	{
	/* Arguments greater than 1/sqrt(epsilon) in magnitude are
	approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x */
	/* log_kernel_amd(x) returns xexp, r1, r2 such that
	log(x) = xexplog(2) + r1 + r2 /
	log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
	/* Add (xexp+1) * log(2) to z1,z2 to get the result asinh(x).
	The computed r1 is not subject to rounding error because
	(xexp+1) has at most 10 significant bits, log(2) has 24 significant
	bits, and r1 has up to 24 bits; and the exponents of r1
	and r2 differ by at most 6. */
	r1 = ((xexp+1) * log2_lead + r1);
	r2 = ((xexp+1) * log2_tail + r2);
	if (xneg)
	return -(r1 + r2);
	else
	return r1 + r2;
	}
	else
	{
	rarg = absx*absx+1.0;
	/* Arguments such that 32.0 <= abs(x) <= 1/sqrt(epsilon) are
	approximated by
	asinh(x) = ln(abs(x) + sqrt(x*x+1))
	with the sign of x (see Abramowitz and Stegun 4.6.20) */
	/* Use assembly instruction to compute r = sqrt(rarg); */
	ASMSQRT(rarg,r);
	r += absx;
	GET_BITS_DP64(r, ax);
	log_kernel_amd64(r, ax, &xexp, &r1, &r2);
	r1 = (xexp * log2_lead + r1);
	r2 = (xexp * log2_tail + r2);
	if (xneg)
	return -(r1 + r2);
	else
	return r1 + r2;
	}
	}
	}

	weak_alias (__asinh, asinh)