blob: 1cb14c46448905ebef4930388b4da4302cac6441 [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
*
* This file is part of libacml_mv.
*
* libacml_mv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libacml_mv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libacml_mv. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
#include "../inc/libm_amd.h"
#include "../inc/libm_util_amd.h"
#define USE_SPLITEXPF
#define USE_SCALEFLOAT_2
#define USE_VALF_WITH_FLAGS
#include "../inc/libm_inlines_amd.h"
#undef USE_SPLITEXPF
#undef USE_SCALEFLOAT_2
#undef USE_VALF_WITH_FLAGS
#include "../inc/libm_errno_amd.h"
float FN_PROTOTYPE(tanhf)(float x)
{
/*
The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
to the following three formulae:
1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
2. (1 - (2/(exp(2*x) + 1 )))
3. (exp(2*x) - 1)/(exp(2*x) + 1)
but computationally, some formulae are better on some ranges.
*/
static const float
thirtytwo_by_log2 = 4.6166240692e+01F, /* 0x4238aa3b */
log2_by_32_lead = 2.1659851074e-02F, /* 0x3cb17000 */
log2_by_32_tail = 9.9831822808e-07F, /* 0x3585fdf4 */
large_threshold = 10.0F; /* 0x41200000 */
unsigned int ux, aux;
float y, z, p, z1, z2, xneg;
int m;
/* Special cases */
GET_BITS_SP32(x, ux);
aux = ux & ~SIGNBIT_SP32;
if (aux < 0x39000000) /* |x| small enough that tanh(x) = x */
{
if (aux == 0)
return x; /* with no inexact */
else
return valf_with_flags(x, AMD_F_INEXACT);
}
else if (aux > 0x7f800000) /* |x| is NaN */
return x + x;
xneg = 1.0F - 2.0F * (aux != ux);
y = xneg * x;
if (y > large_threshold)
{
/* If x is large then exp(-x) is negligible and
formula 1 reduces to plus or minus 1.0 */
z = 1.0F;
}
else if (y <= 1.0F)
{
float y2;
y2 = y*y;
if (y < 0.9F)
{
/* Use a [2,1] Remez approximation on [0,0.9]. */
z = y + y*y2*
(-0.28192806108402678e0F +
(-0.14628356048797849e-2F +
0.4891631088530669873e-4F*y2)*y2)/
(0.845784192581041099e0F +
0.3427017942262751343e0F*y2);
}
else
{
/* Use a [2,1] Remez approximation on [0.9,1]. */
z = y + y*y2*
(-0.24069858695196524e0F +
(-0.12325644183611929e-2F +
0.3827534993599483396e-4F*y2)*y2)/
(0.72209738473684982e0F +
0.292529068698052819e0F*y2);
}
}
else
{
/* Compute p = exp(2*y) + 1. The code is basically inlined
from exp_amd. */
splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead,
log2_by_32_tail, &m, &z1, &z2);
p = scaleFloat_2(z1 + z2, m) + 1.0F;
/* Now reconstruct tanh from p. */
z = (1.0F - 2.0F/p);
}
return xneg * z;
}
weak_alias (__tanhf, tanhf)