src/tanh.c - open64_libacml_mv - Git at Google


 /*
 *  Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  This file is part of libacml_mv.
 *
 *  libacml_mv is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  libacml_mv is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with libacml_mv.  If not, see
 *  <http://www.gnu.org/licenses/>.
 *
 */


 #include "../inc/libm_amd.h"
 #include "../inc/libm_util_amd.h"


 #define USE_SPLITEXP
 #define USE_SCALEDOUBLE_2
 #define USE_VAL_WITH_FLAGS
 #include "../inc/libm_inlines_amd.h"
 #undef USE_SPLITEXP
 #undef USE_SCALEDOUBLE_2
 #undef USE_VAL_WITH_FLAGS

 double FN_PROTOTYPE(tanh)(double x)
 {
   /*
     The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
     to the following three formulae:
     1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
     2.  (1 - (2/(exp(2*x) + 1 )))
     3.  (exp(2*x) - 1)/(exp(2*x) + 1)
     but computationally, some formulae are better on some ranges.
   */
   static const double
     thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
     log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
     log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
     large_threshold = 20.0; /* 0x4034000000000000 */

   unsigned long long ux, aux, xneg;
   double y, z, p, z1, z2;
   int m;

   /* Special cases */

   GET_BITS_DP64(x, ux);
   aux = ux & ~SIGNBIT_DP64;
   if (aux < 0x3e30000000000000) /* |x| small enough that tanh(x) = x */
     {
       if (aux == 0)
         return x; /* with no inexact */
       else
         return val_with_flags(x, AMD_F_INEXACT);
     }
   else if (aux > 0x7ff0000000000000) /* |x| is NaN */
     return x + x;

   xneg = (aux != ux);

   y = x;
   if (xneg) y = -x;

   if (y > large_threshold)
     {
       /* If x is large then exp(-x) is negligible and
          formula 1 reduces to plus or minus 1.0 */
       z = 1.0;
     }
   else if (y <= 1.0)
     {
       double y2;
       y2 = y*y;
       if (y < 0.9)
         {
           /* Use a [3,3] Remez approximation on [0,0.9]. */
           z = y + y*y2*
             (-0.274030424656179760118928e0 +
              (-0.176016349003044679402273e-1 +
               (-0.200047621071909498730453e-3 -
                0.142077926378834722618091e-7*y2)*y2)*y2)/
             (0.822091273968539282568011e0 +
              (0.381641414288328849317962e0 +
               (0.201562166026937652780575e-1 +
                0.2091140262529164482568557e-3*y2)*y2)*y2);
         }
       else
         {
           /* Use a [3,3] Remez approximation on [0.9,1]. */
           z = y + y*y2*
             (-0.227793870659088295252442e0 +
              (-0.146173047288731678404066e-1 +
               (-0.165597043903549960486816e-3 -
                0.115475878996143396378318e-7*y2)*y2)*y2)/
             (0.683381611977295894959554e0 +
              (0.317204558977294374244770e0 +
               (0.167358775461896562588695e-1 +
                0.173076050126225961768710e-3*y2)*y2)*y2);
         }
     }
   else
     {
       /* Compute p = exp(2*y) + 1. The code is basically inlined
          from exp_amd. */

       splitexp(2*y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
 	       log2_by_32_tail, &m, &z1, &z2);
       p = scaleDouble_2(z1 + z2, m) + 1.0;

       /* Now reconstruct tanh from p. */
       z = (1.0 - 2.0/p);
     }

   if (xneg) z = - z;
   return z;
 }

 weak_alias (__tanh, tanh)

	/*
	* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	*
	* This file is part of libacml_mv.
	*
	* libacml_mv is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* libacml_mv is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with libacml_mv. If not, see
	* <http://www.gnu.org/licenses/>.
	*
	*/


	#include "../inc/libm_amd.h"
	#include "../inc/libm_util_amd.h"


	#define USE_SPLITEXP
	#define USE_SCALEDOUBLE_2
	#define USE_VAL_WITH_FLAGS
	#include "../inc/libm_inlines_amd.h"
	#undef USE_SPLITEXP
	#undef USE_SCALEDOUBLE_2
	#undef USE_VAL_WITH_FLAGS

	double FN_PROTOTYPE(tanh)(double x)
	{
	/*
	The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
	to the following three formulae:
	1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
	2. (1 - (2/(exp(2*x) + 1 )))
	3. (exp(2x) - 1)/(exp(2x) + 1)
	but computationally, some formulae are better on some ranges.
	*/
	static const double
	thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
	log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
	log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
	large_threshold = 20.0; /* 0x4034000000000000 */

	unsigned long long ux, aux, xneg;
	double y, z, p, z1, z2;
	int m;

	/* Special cases */

	GET_BITS_DP64(x, ux);
	aux = ux & ~SIGNBIT_DP64;
	if (aux < 0x3e30000000000000) /* \|x\| small enough that tanh(x) = x */
	{
	if (aux == 0)
	return x; /* with no inexact */
	else
	return val_with_flags(x, AMD_F_INEXACT);
	}
	else if (aux > 0x7ff0000000000000) /* \|x\| is NaN */
	return x + x;

	xneg = (aux != ux);

	y = x;
	if (xneg) y = -x;

	if (y > large_threshold)
	{
	/* If x is large then exp(-x) is negligible and
	formula 1 reduces to plus or minus 1.0 */
	z = 1.0;
	}
	else if (y <= 1.0)
	{
	double y2;
	y2 = y*y;
	if (y < 0.9)
	{
	/* Use a [3,3] Remez approximation on [0,0.9]. */
	z = y + yy2
	(-0.274030424656179760118928e0 +
	(-0.176016349003044679402273e-1 +
	(-0.200047621071909498730453e-3 -
	0.142077926378834722618091e-7y2)y2)*y2)/
	(0.822091273968539282568011e0 +
	(0.381641414288328849317962e0 +
	(0.201562166026937652780575e-1 +
	0.2091140262529164482568557e-3y2)y2)*y2);
	}
	else
	{
	/* Use a [3,3] Remez approximation on [0.9,1]. */
	z = y + yy2
	(-0.227793870659088295252442e0 +
	(-0.146173047288731678404066e-1 +
	(-0.165597043903549960486816e-3 -
	0.115475878996143396378318e-7y2)y2)*y2)/
	(0.683381611977295894959554e0 +
	(0.317204558977294374244770e0 +
	(0.167358775461896562588695e-1 +
	0.173076050126225961768710e-3y2)y2)*y2);
	}
	}
	else
	{
	/* Compute p = exp(2*y) + 1. The code is basically inlined
	from exp_amd. */

	splitexp(2*y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
	log2_by_32_tail, &m, &z1, &z2);
	p = scaleDouble_2(z1 + z2, m) + 1.0;

	/* Now reconstruct tanh from p. */
	z = (1.0 - 2.0/p);
	}

	if (xneg) z = - z;
	return z;
	}

	weak_alias (__tanh, tanh)