blob: ead758b78e2b36882002b0eb88fdeb37ba5ae07d [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
*
* This file is part of libacml_mv.
*
* libacml_mv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libacml_mv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libacml_mv. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
#include "../inc/libm_amd.h"
#include "../inc/libm_util_amd.h"
#define USE_SPLITEXP
#define USE_SCALEDOUBLE_2
#define USE_VAL_WITH_FLAGS
#include "../inc/libm_inlines_amd.h"
#undef USE_SPLITEXP
#undef USE_SCALEDOUBLE_2
#undef USE_VAL_WITH_FLAGS
double FN_PROTOTYPE(tanh)(double x)
{
/*
The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
to the following three formulae:
1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
2. (1 - (2/(exp(2*x) + 1 )))
3. (exp(2*x) - 1)/(exp(2*x) + 1)
but computationally, some formulae are better on some ranges.
*/
static const double
thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
large_threshold = 20.0; /* 0x4034000000000000 */
unsigned long long ux, aux, xneg;
double y, z, p, z1, z2;
int m;
/* Special cases */
GET_BITS_DP64(x, ux);
aux = ux & ~SIGNBIT_DP64;
if (aux < 0x3e30000000000000) /* |x| small enough that tanh(x) = x */
{
if (aux == 0)
return x; /* with no inexact */
else
return val_with_flags(x, AMD_F_INEXACT);
}
else if (aux > 0x7ff0000000000000) /* |x| is NaN */
return x + x;
xneg = (aux != ux);
y = x;
if (xneg) y = -x;
if (y > large_threshold)
{
/* If x is large then exp(-x) is negligible and
formula 1 reduces to plus or minus 1.0 */
z = 1.0;
}
else if (y <= 1.0)
{
double y2;
y2 = y*y;
if (y < 0.9)
{
/* Use a [3,3] Remez approximation on [0,0.9]. */
z = y + y*y2*
(-0.274030424656179760118928e0 +
(-0.176016349003044679402273e-1 +
(-0.200047621071909498730453e-3 -
0.142077926378834722618091e-7*y2)*y2)*y2)/
(0.822091273968539282568011e0 +
(0.381641414288328849317962e0 +
(0.201562166026937652780575e-1 +
0.2091140262529164482568557e-3*y2)*y2)*y2);
}
else
{
/* Use a [3,3] Remez approximation on [0.9,1]. */
z = y + y*y2*
(-0.227793870659088295252442e0 +
(-0.146173047288731678404066e-1 +
(-0.165597043903549960486816e-3 -
0.115475878996143396378318e-7*y2)*y2)*y2)/
(0.683381611977295894959554e0 +
(0.317204558977294374244770e0 +
(0.167358775461896562588695e-1 +
0.173076050126225961768710e-3*y2)*y2)*y2);
}
}
else
{
/* Compute p = exp(2*y) + 1. The code is basically inlined
from exp_amd. */
splitexp(2*y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
log2_by_32_tail, &m, &z1, &z2);
p = scaleDouble_2(z1 + z2, m) + 1.0;
/* Now reconstruct tanh from p. */
z = (1.0 - 2.0/p);
}
if (xneg) z = - z;
return z;
}
weak_alias (__tanh, tanh)