src/tanhf.c - open64_libacml_mv - Git at Google


 /*
 *  Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  This file is part of libacml_mv.
 *
 *  libacml_mv is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  libacml_mv is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with libacml_mv.  If not, see
 *  <http://www.gnu.org/licenses/>.
 *
 */


 #include "../inc/libm_amd.h"
 #include "../inc/libm_util_amd.h"


 #define USE_SPLITEXPF
 #define USE_SCALEFLOAT_2
 #define USE_VALF_WITH_FLAGS
 #include "../inc/libm_inlines_amd.h"
 #undef USE_SPLITEXPF
 #undef USE_SCALEFLOAT_2
 #undef USE_VALF_WITH_FLAGS

 #include "../inc/libm_errno_amd.h"

 float FN_PROTOTYPE(tanhf)(float x)
 {
   /*
     The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
     to the following three formulae:
     1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
     2.  (1 - (2/(exp(2*x) + 1 )))
     3.  (exp(2*x) - 1)/(exp(2*x) + 1)
     but computationally, some formulae are better on some ranges.
   */
   static const float
     thirtytwo_by_log2 =  4.6166240692e+01F, /* 0x4238aa3b */
     log2_by_32_lead =  2.1659851074e-02F, /* 0x3cb17000 */
     log2_by_32_tail =  9.9831822808e-07F, /* 0x3585fdf4 */
     large_threshold = 10.0F; /* 0x41200000 */

   unsigned int ux, aux;
   float y, z, p, z1, z2, xneg;
   int m;

   /* Special cases */

   GET_BITS_SP32(x, ux);
   aux = ux & ~SIGNBIT_SP32;
   if (aux < 0x39000000) /* |x| small enough that tanh(x) = x */
     {
       if (aux == 0)
         return x; /* with no inexact */
       else
         return valf_with_flags(x, AMD_F_INEXACT);
     }
   else if (aux > 0x7f800000) /* |x| is NaN */
     return x + x;

   xneg = 1.0F - 2.0F * (aux != ux);

   y = xneg * x;

   if (y > large_threshold)
     {
       /* If x is large then exp(-x) is negligible and
          formula 1 reduces to plus or minus 1.0 */
       z = 1.0F;
     }
   else if (y <= 1.0F)
     {
       float y2;
       y2 = y*y;

       if (y < 0.9F)
         {
           /* Use a [2,1] Remez approximation on [0,0.9]. */
           z = y + y*y2*
             (-0.28192806108402678e0F +
              (-0.14628356048797849e-2F +
               0.4891631088530669873e-4F*y2)*y2)/
             (0.845784192581041099e0F +
              0.3427017942262751343e0F*y2);
         }
       else
         {
           /* Use a [2,1] Remez approximation on [0.9,1]. */
           z = y + y*y2*
             (-0.24069858695196524e0F +
              (-0.12325644183611929e-2F +
               0.3827534993599483396e-4F*y2)*y2)/
             (0.72209738473684982e0F +
              0.292529068698052819e0F*y2);
         }
     }
   else
     {
       /* Compute p = exp(2*y) + 1. The code is basically inlined
          from exp_amd. */

       splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead,
 	       log2_by_32_tail, &m, &z1, &z2);
       p = scaleFloat_2(z1 + z2, m) + 1.0F;
       /* Now reconstruct tanh from p. */
       z = (1.0F - 2.0F/p);
     }

   return xneg * z;
 }


 weak_alias (__tanhf, tanhf)

	/*
	* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	*
	* This file is part of libacml_mv.
	*
	* libacml_mv is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* libacml_mv is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with libacml_mv. If not, see
	* <http://www.gnu.org/licenses/>.
	*
	*/



	#include "../inc/libm_amd.h"
	#include "../inc/libm_util_amd.h"



	#define USE_SPLITEXPF
	#define USE_SCALEFLOAT_2
	#define USE_VALF_WITH_FLAGS
	#include "../inc/libm_inlines_amd.h"
	#undef USE_SPLITEXPF
	#undef USE_SCALEFLOAT_2
	#undef USE_VALF_WITH_FLAGS

	#include "../inc/libm_errno_amd.h"

	float FN_PROTOTYPE(tanhf)(float x)
	{
	/*
	The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
	to the following three formulae:
	1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
	2. (1 - (2/(exp(2*x) + 1 )))
	3. (exp(2x) - 1)/(exp(2x) + 1)
	but computationally, some formulae are better on some ranges.
	*/
	static const float
	thirtytwo_by_log2 = 4.6166240692e+01F, /* 0x4238aa3b */
	log2_by_32_lead = 2.1659851074e-02F, /* 0x3cb17000 */
	log2_by_32_tail = 9.9831822808e-07F, /* 0x3585fdf4 */
	large_threshold = 10.0F; /* 0x41200000 */

	unsigned int ux, aux;
	float y, z, p, z1, z2, xneg;
	int m;

	/* Special cases */

	GET_BITS_SP32(x, ux);
	aux = ux & ~SIGNBIT_SP32;
	if (aux < 0x39000000) /* \|x\| small enough that tanh(x) = x */
	{
	if (aux == 0)
	return x; /* with no inexact */
	else
	return valf_with_flags(x, AMD_F_INEXACT);
	}
	else if (aux > 0x7f800000) /* \|x\| is NaN */
	return x + x;

	xneg = 1.0F - 2.0F * (aux != ux);

	y = xneg * x;

	if (y > large_threshold)
	{
	/* If x is large then exp(-x) is negligible and
	formula 1 reduces to plus or minus 1.0 */
	z = 1.0F;
	}
	else if (y <= 1.0F)
	{
	float y2;
	y2 = y*y;

	if (y < 0.9F)
	{
	/* Use a [2,1] Remez approximation on [0,0.9]. */
	z = y + yy2
	(-0.28192806108402678e0F +
	(-0.14628356048797849e-2F +
	0.4891631088530669873e-4Fy2)y2)/
	(0.845784192581041099e0F +
	0.3427017942262751343e0F*y2);
	}
	else
	{
	/* Use a [2,1] Remez approximation on [0.9,1]. */
	z = y + yy2
	(-0.24069858695196524e0F +
	(-0.12325644183611929e-2F +
	0.3827534993599483396e-4Fy2)y2)/
	(0.72209738473684982e0F +
	0.292529068698052819e0F*y2);
	}
	}
	else
	{
	/* Compute p = exp(2*y) + 1. The code is basically inlined
	from exp_amd. */

	splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead,
	log2_by_32_tail, &m, &z1, &z2);
	p = scaleFloat_2(z1 + z2, m) + 1.0F;
	/* Now reconstruct tanh from p. */
	z = (1.0F - 2.0F/p);
	}

	return xneg * z;
	}


	weak_alias (__tanhf, tanhf)