blob: 7ecde9c54968c974ed9e46c9d76931c9dbfe3b1e [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
*
* This file is part of libacml_mv.
*
* libacml_mv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libacml_mv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libacml_mv. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
#include "../inc/libm_amd.h"
#include "../inc/libm_util_amd.h"
#define USE_HANDLE_ERROR
#define USE_LOG_KERNEL_AMD
#define USE_VAL_WITH_FLAGS
#include "../inc/libm_inlines_amd.h"
#undef USE_HANDLE_ERROR
#undef USE_LOG_KERNEL_AMD
#undef VAL_WITH_FLAGS
#undef _FUNCNAME
#define _FUNCNAME "asinh"
double FN_PROTOTYPE(asinh)(double x)
{
unsigned long long ux, ax, xneg;
double absx, r, rarg, t, r1, r2, poly, s, v1, v2;
int xexp;
static const unsigned long long
rteps = 0x3e46a09e667f3bcd, /* sqrt(eps) = 1.05367121277235086670e-08 */
recrteps = 0x4196a09e667f3bcd; /* 1/rteps = 9.49062656242515593767e+07 */
/* log2_lead and log2_tail sum to an extra-precise version
of log(2) */
static const double
log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */
log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
GET_BITS_DP64(x, ux);
ax = ux & ~SIGNBIT_DP64;
xneg = ux & SIGNBIT_DP64;
PUT_BITS_DP64(ax, absx);
if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
{
/* x is either NaN or infinity */
if (ux & MANTBITS_DP64)
{
/* x is NaN */
#ifdef WINDOWS
return handle_error(_FUNCNAME, ux|0x0008000000000000, _DOMAIN,
AMD_F_INVALID, EDOM, x, 0.0);
#else
return x + x; /* Raise invalid if it is a signalling NaN */
#endif
}
else
{
/* x is infinity. Return the same infinity. */
#ifdef WINDOWS
if (ux & SIGNBIT_DP64)
return handle_error(_FUNCNAME, NINFBITPATT_DP64, _DOMAIN,
AMD_F_INVALID, EDOM, x, 0.0);
else
return handle_error(_FUNCNAME, PINFBITPATT_DP64, _DOMAIN,
AMD_F_INVALID, EDOM, x, 0.0);
#else
return x;
#endif
}
}
else if (ax < rteps) /* abs(x) < sqrt(epsilon) */
{
if (ax == 0x0000000000000000)
{
/* x is +/-zero. Return the same zero. */
return x;
}
else
{
/* Tiny arguments approximated by asinh(x) = x
- avoid slow operations on denormalized numbers */
return val_with_flags(x,AMD_F_INEXACT);
}
}
if (ax <= 0x3ff0000000000000) /* abs(x) <= 1.0 */
{
/* Arguments less than 1.0 in magnitude are
approximated by [4,4] or [5,4] minimax polynomials
fitted to asinh series 4.6.31 (x < 1) from Abramowitz and Stegun
*/
t = x*x;
if (ax < 0x3fd0000000000000)
{
/* [4,4] for 0 < abs(x) < 0.25 */
poly =
(-0.12845379283524906084997e0 +
(-0.21060688498409799700819e0 +
(-0.10188951822578188309186e0 +
(-0.13891765817243625541799e-1 -
0.10324604871728082428024e-3 * t) * t) * t) * t) /
(0.77072275701149440164511e0 +
(0.16104665505597338100747e1 +
(0.11296034614816689554875e1 +
(0.30079351943799465092429e0 +
0.235224464765951442265117e-1 * t) * t) * t) * t);
}
else if (ax < 0x3fe0000000000000)
{
/* [4,4] for 0.25 <= abs(x) < 0.5 */
poly =
(-0.12186605129448852495563e0 +
(-0.19777978436593069928318e0 +
(-0.94379072395062374824320e-1 +
(-0.12620141363821680162036e-1 -
0.903396794842691998748349e-4 * t) * t) * t) * t) /
(0.73119630776696495279434e0 +
(0.15157170446881616648338e1 +
(0.10524909506981282725413e1 +
(0.27663713103600182193817e0 +
0.21263492900663656707646e-1 * t) * t) * t) * t);
}
else if (ax < 0x3fe8000000000000)
{
/* [4,4] for 0.5 <= abs(x) < 0.75 */
poly =
(-0.81210026327726247622500e-1 +
(-0.12327355080668808750232e0 +
(-0.53704925162784720405664e-1 +
(-0.63106739048128554465450e-2 -
0.35326896180771371053534e-4 * t) * t) * t) * t) /
(0.48726015805581794231182e0 +
(0.95890837357081041150936e0 +
(0.62322223426940387752480e0 +
(0.15028684818508081155141e0 +
0.10302171620320141529445e-1 * t) * t) * t) * t);
}
else
{
/* [5,4] for 0.75 <= abs(x) <= 1.0 */
poly =
(-0.4638179204422665073e-1 +
(-0.7162729496035415183e-1 +
(-0.3247795155696775148e-1 +
(-0.4225785421291932164e-2 +
(-0.3808984717603160127e-4 +
0.8023464184964125826e-6 * t) * t) * t) * t) * t) /
(0.2782907534642231184e0 +
(0.5549945896829343308e0 +
(0.3700732511330698879e0 +
(0.9395783438240780722e-1 +
0.7200057974217143034e-2 * t) * t) * t) * t);
}
return x + x*t*poly;
}
else if (ax < 0x4040000000000000)
{
/* 1.0 <= abs(x) <= 32.0 */
/* Arguments in this region are approximated by various
minimax polynomials fitted to asinh series 4.6.31
in Abramowitz and Stegun.
*/
t = x*x;
if (ax >= 0x4020000000000000)
{
/* [3,3] for 8.0 <= abs(x) <= 32.0 */
poly =
(-0.538003743384069117e-10 +
(-0.273698654196756169e-9 +
(-0.268129826956403568e-9 -
0.804163374628432850e-29 * t) * t) * t) /
(0.238083376363471960e-9 +
(0.203579344621125934e-8 +
(0.450836980450693209e-8 +
0.286005148753497156e-8 * t) * t) * t);
}
else if (ax >= 0x4010000000000000)
{
/* [4,3] for 4.0 <= abs(x) <= 8.0 */
poly =
(-0.178284193496441400e-6 +
(-0.928734186616614974e-6 +
(-0.923318925566302615e-6 +
(-0.776417026702577552e-19 +
0.290845644810826014e-21 * t) * t) * t) * t) /
(0.786694697277890964e-6 +
(0.685435665630965488e-5 +
(0.153780175436788329e-4 +
0.984873520613417917e-5 * t) * t) * t);
}
else if (ax >= 0x4000000000000000)
{
/* [5,4] for 2.0 <= abs(x) <= 4.0 */
poly =
(-0.209689451648100728e-6 +
(-0.219252358028695992e-5 +
(-0.551641756327550939e-5 +
(-0.382300259826830258e-5 +
(-0.421182121910667329e-17 +
0.492236019998237684e-19 * t) * t) * t) * t) * t) /
(0.889178444424237735e-6 +
(0.131152171690011152e-4 +
(0.537955850185616847e-4 +
(0.814966175170941864e-4 +
0.407786943832260752e-4 * t) * t) * t) * t);
}
else if (ax >= 0x3ff8000000000000)
{
/* [5,4] for 1.5 <= abs(x) <= 2.0 */
poly =
(-0.195436610112717345e-4 +
(-0.233315515113382977e-3 +
(-0.645380957611087587e-3 +
(-0.478948863920281252e-3 +
(-0.805234112224091742e-12 +
0.246428598194879283e-13 * t) * t) * t) * t) * t) /
(0.822166621698664729e-4 +
(0.135346265620413852e-2 +
(0.602739242861830658e-2 +
(0.972227795510722956e-2 +
0.510878800983771167e-2 * t) * t) * t) * t);
}
else
{
/* [5,5] for 1.0 <= abs(x) <= 1.5 */
poly =
(-0.121224194072430701e-4 +
(-0.273145455834305218e-3 +
(-0.152866982560895737e-2 +
(-0.292231744584913045e-2 +
(-0.174670900236060220e-2 -
0.891754209521081538e-12 * t) * t) * t) * t) * t) /
(0.499426632161317606e-4 +
(0.139591210395547054e-2 +
(0.107665231109108629e-1 +
(0.325809818749873406e-1 +
(0.415222526655158363e-1 +
0.186315628774716763e-1 * t) * t) * t) * t) * t);
}
log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
r1 = ((xexp+1) * log2_lead + r1);
r2 = ((xexp+1) * log2_tail + r2);
/* Now (r1,r2) sum to log(2x). Add the term
1/(2.2.x^2) = 0.25/t, and add poly/t, carefully
to maintain precision. (Note that we add poly/t
rather than poly because of the *x factor used
when generating the minimax polynomial) */
v2 = (poly+0.25)/t;
r = v2 + r1;
s = ((r1 - r) + v2) + r2;
v1 = r + s;
v2 = (r - v1) + s;
r = v1 + v2;
if (xneg)
return -r;
else
return r;
}
else
{
/* abs(x) > 32.0 */
if (ax > recrteps)
{
/* Arguments greater than 1/sqrt(epsilon) in magnitude are
approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x */
/* log_kernel_amd(x) returns xexp, r1, r2 such that
log(x) = xexp*log(2) + r1 + r2 */
log_kernel_amd64(absx, ax, &xexp, &r1, &r2);
/* Add (xexp+1) * log(2) to z1,z2 to get the result asinh(x).
The computed r1 is not subject to rounding error because
(xexp+1) has at most 10 significant bits, log(2) has 24 significant
bits, and r1 has up to 24 bits; and the exponents of r1
and r2 differ by at most 6. */
r1 = ((xexp+1) * log2_lead + r1);
r2 = ((xexp+1) * log2_tail + r2);
if (xneg)
return -(r1 + r2);
else
return r1 + r2;
}
else
{
rarg = absx*absx+1.0;
/* Arguments such that 32.0 <= abs(x) <= 1/sqrt(epsilon) are
approximated by
asinh(x) = ln(abs(x) + sqrt(x*x+1))
with the sign of x (see Abramowitz and Stegun 4.6.20) */
/* Use assembly instruction to compute r = sqrt(rarg); */
ASMSQRT(rarg,r);
r += absx;
GET_BITS_DP64(r, ax);
log_kernel_amd64(r, ax, &xexp, &r1, &r2);
r1 = (xexp * log2_lead + r1);
r2 = (xexp * log2_tail + r2);
if (xneg)
return -(r1 + r2);
else
return r1 + r2;
}
}
}
weak_alias (__asinh, asinh)