inc/libm_util_amd.h - open64_libacml_mv - Git at Google


 /*
 *  Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 *  This file is part of libacml_mv.
 *
 *  libacml_mv is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  libacml_mv is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with libacml_mv.  If not, see
 *  <http://www.gnu.org/licenses/>.
 *
 */


 #ifndef LIBM_UTIL_AMD_H_INCLUDED
 #define LIBM_UTIL_AMD_H_INCLUDED 1


 typedef float F32;
 typedef unsigned int U32;
 typedef int S32;

 typedef double F64;
 typedef unsigned long long  U64;
 typedef long long S64;

 union UT32_
 {
     F32 f32;
     U32 u32;
 };

 union UT64_
 {
     F64 f64;
     U64 u64;

     F32 f32[2];
     U32 u32[2];
 };

 typedef union UT32_ UT32;
 typedef union UT64_ UT64;


 #define QNAN_MASK_32        0x00400000
 #define QNAN_MASK_64        0x0008000000000000


 #define MULTIPLIER_SP 24
 #define MULTIPLIER_DP 53

 static const double VAL_2PMULTIPLIER_DP =  9007199254740992.0;
 static const double VAL_2PMMULTIPLIER_DP = 1.1102230246251565404236316680908e-16;
 static const float VAL_2PMULTIPLIER_SP =  16777216.0F;
 static const float VAL_2PMMULTIPLIER_SP = 5.9604645e-8F;


 /* Definitions for double functions on 64 bit machines */
 #define SIGNBIT_DP64      0x8000000000000000
 #define EXPBITS_DP64      0x7ff0000000000000
 #define MANTBITS_DP64     0x000fffffffffffff
 #define ONEEXPBITS_DP64   0x3ff0000000000000
 #define TWOEXPBITS_DP64   0x4000000000000000
 #define HALFEXPBITS_DP64  0x3fe0000000000000
 #define IMPBIT_DP64       0x0010000000000000
 #define QNANBITPATT_DP64  0x7ff8000000000000
 #define INDEFBITPATT_DP64 0xfff8000000000000
 #define PINFBITPATT_DP64  0x7ff0000000000000
 #define NINFBITPATT_DP64  0xfff0000000000000
 #define EXPBIAS_DP64      1023
 #define EXPSHIFTBITS_DP64 52
 #define BIASEDEMIN_DP64   1
 #define EMIN_DP64         -1022
 #define BIASEDEMAX_DP64   2046
 #define EMAX_DP64         1023
 #define LAMBDA_DP64       1.0e300
 #define MANTLENGTH_DP64   53
 #define BASEDIGITS_DP64   15


 /* These definitions, used by float functions,
    are for both 32 and 64 bit machines */
 #define SIGNBIT_SP32      0x80000000
 #define EXPBITS_SP32      0x7f800000
 #define MANTBITS_SP32     0x007fffff
 #define ONEEXPBITS_SP32   0x3f800000
 #define TWOEXPBITS_SP32   0x40000000
 #define HALFEXPBITS_SP32  0x3f000000
 #define IMPBIT_SP32       0x00800000
 #define QNANBITPATT_SP32  0x7fc00000
 #define INDEFBITPATT_SP32 0xffc00000
 #define PINFBITPATT_SP32  0x7f800000
 #define NINFBITPATT_SP32  0xff800000
 #define EXPBIAS_SP32      127
 #define EXPSHIFTBITS_SP32 23
 #define BIASEDEMIN_SP32   1
 #define EMIN_SP32         -126
 #define BIASEDEMAX_SP32   254
 #define EMAX_SP32         127
 #define LAMBDA_SP32       1.0e30
 #define MANTLENGTH_SP32   24
 #define BASEDIGITS_SP32   7

 #define CLASS_SIGNALLING_NAN 1
 #define CLASS_QUIET_NAN 2
 #define CLASS_NEGATIVE_INFINITY 3
 #define CLASS_NEGATIVE_NORMAL_NONZERO 4
 #define CLASS_NEGATIVE_DENORMAL 5
 #define CLASS_NEGATIVE_ZERO 6
 #define CLASS_POSITIVE_ZERO 7
 #define CLASS_POSITIVE_DENORMAL 8
 #define CLASS_POSITIVE_NORMAL_NONZERO 9
 #define CLASS_POSITIVE_INFINITY 10

 #define OLD_BITS_SP32(x) (*((unsigned int *)&x))
 #define OLD_BITS_DP64(x) (*((unsigned long long *)&x))

 /* Alternatives to the above functions which don't have
    problems when using high optimization levels on gcc */
 #define GET_BITS_SP32(x, ux) \
   { \
     volatile union {float f; unsigned int i;} _bitsy; \
     _bitsy.f = (x); \
     ux = _bitsy.i; \
   }
 #define PUT_BITS_SP32(ux, x) \
   { \
     volatile union {float f; unsigned int i;} _bitsy; \
     _bitsy.i = (ux); \
      x = _bitsy.f; \
   }

 #define GET_BITS_DP64(x, ux) \
   { \
     volatile union {double d; unsigned long long i;} _bitsy; \
     _bitsy.d = (x); \
     ux = _bitsy.i; \
   }
 #define PUT_BITS_DP64(ux, x) \
   { \
     volatile union {double d; unsigned long long i;} _bitsy; \
     _bitsy.i = (ux); \
     x = _bitsy.d; \
   }


 /* Processor-dependent floating-point status flags */
 #define AMD_F_INEXACT 0x00000020
 #define AMD_F_UNDERFLOW 0x00000010
 #define AMD_F_OVERFLOW 0x00000008
 #define AMD_F_DIVBYZERO 0x00000004
 #define AMD_F_INVALID 0x00000001

 /* Processor-dependent floating-point precision-control flags */
 #define AMD_F_EXTENDED 0x00000300
 #define AMD_F_DOUBLE   0x00000200
 #define AMD_F_SINGLE   0x00000000

 /* Processor-dependent floating-point rounding-control flags */
 #define AMD_F_RC_NEAREST 0x00000000
 #define AMD_F_RC_DOWN    0x00002000
 #define AMD_F_RC_UP      0x00004000
 #define AMD_F_RC_ZERO    0x00006000

 /* How to get hold of an assembly square root instruction:
  *   ASMQRT(x,y) computes y = sqrt(x).
  */
 #ifdef WINDOWS
 /* VC++ intrinsic call */
 #define ASMSQRT(x,y) _mm_store_sd(&y, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x)));
 #else
 /* Hammer sqrt instruction */
 #define ASMSQRT(x,y) asm volatile ("sqrtsd %1, %0" : "=x" (y) : "x" (x));
 #endif

 #endif /* LIBM_UTIL_AMD_H_INCLUDED */

	/*
	* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
	*
	* This file is part of libacml_mv.
	*
	* libacml_mv is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* libacml_mv is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with libacml_mv. If not, see
	* <http://www.gnu.org/licenses/>.
	*
	*/


	#ifndef LIBM_UTIL_AMD_H_INCLUDED
	#define LIBM_UTIL_AMD_H_INCLUDED 1






	typedef float F32;
	typedef unsigned int U32;
	typedef int S32;

	typedef double F64;
	typedef unsigned long long U64;
	typedef long long S64;

	union UT32_
	{
	F32 f32;
	U32 u32;
	};

	union UT64_
	{
	F64 f64;
	U64 u64;

	F32 f32[2];
	U32 u32[2];
	};

	typedef union UT32_ UT32;
	typedef union UT64_ UT64;




	#define QNAN_MASK_32 0x00400000
	#define QNAN_MASK_64 0x0008000000000000


	#define MULTIPLIER_SP 24
	#define MULTIPLIER_DP 53

	static const double VAL_2PMULTIPLIER_DP = 9007199254740992.0;
	static const double VAL_2PMMULTIPLIER_DP = 1.1102230246251565404236316680908e-16;
	static const float VAL_2PMULTIPLIER_SP = 16777216.0F;
	static const float VAL_2PMMULTIPLIER_SP = 5.9604645e-8F;





	/* Definitions for double functions on 64 bit machines */
	#define SIGNBIT_DP64 0x8000000000000000
	#define EXPBITS_DP64 0x7ff0000000000000
	#define MANTBITS_DP64 0x000fffffffffffff
	#define ONEEXPBITS_DP64 0x3ff0000000000000
	#define TWOEXPBITS_DP64 0x4000000000000000
	#define HALFEXPBITS_DP64 0x3fe0000000000000
	#define IMPBIT_DP64 0x0010000000000000
	#define QNANBITPATT_DP64 0x7ff8000000000000
	#define INDEFBITPATT_DP64 0xfff8000000000000
	#define PINFBITPATT_DP64 0x7ff0000000000000
	#define NINFBITPATT_DP64 0xfff0000000000000
	#define EXPBIAS_DP64 1023
	#define EXPSHIFTBITS_DP64 52
	#define BIASEDEMIN_DP64 1
	#define EMIN_DP64 -1022
	#define BIASEDEMAX_DP64 2046
	#define EMAX_DP64 1023
	#define LAMBDA_DP64 1.0e300
	#define MANTLENGTH_DP64 53
	#define BASEDIGITS_DP64 15


	/* These definitions, used by float functions,
	are for both 32 and 64 bit machines */
	#define SIGNBIT_SP32 0x80000000
	#define EXPBITS_SP32 0x7f800000
	#define MANTBITS_SP32 0x007fffff
	#define ONEEXPBITS_SP32 0x3f800000
	#define TWOEXPBITS_SP32 0x40000000
	#define HALFEXPBITS_SP32 0x3f000000
	#define IMPBIT_SP32 0x00800000
	#define QNANBITPATT_SP32 0x7fc00000
	#define INDEFBITPATT_SP32 0xffc00000
	#define PINFBITPATT_SP32 0x7f800000
	#define NINFBITPATT_SP32 0xff800000
	#define EXPBIAS_SP32 127
	#define EXPSHIFTBITS_SP32 23
	#define BIASEDEMIN_SP32 1
	#define EMIN_SP32 -126
	#define BIASEDEMAX_SP32 254
	#define EMAX_SP32 127
	#define LAMBDA_SP32 1.0e30
	#define MANTLENGTH_SP32 24
	#define BASEDIGITS_SP32 7

	#define CLASS_SIGNALLING_NAN 1
	#define CLASS_QUIET_NAN 2
	#define CLASS_NEGATIVE_INFINITY 3
	#define CLASS_NEGATIVE_NORMAL_NONZERO 4
	#define CLASS_NEGATIVE_DENORMAL 5
	#define CLASS_NEGATIVE_ZERO 6
	#define CLASS_POSITIVE_ZERO 7
	#define CLASS_POSITIVE_DENORMAL 8
	#define CLASS_POSITIVE_NORMAL_NONZERO 9
	#define CLASS_POSITIVE_INFINITY 10

	#define OLD_BITS_SP32(x) (((unsigned int )&x))
	#define OLD_BITS_DP64(x) (((unsigned long long )&x))

	/* Alternatives to the above functions which don't have
	problems when using high optimization levels on gcc */
	#define GET_BITS_SP32(x, ux) \
	{ \
	volatile union {float f; unsigned int i;} _bitsy; \
	_bitsy.f = (x); \
	ux = _bitsy.i; \
	}
	#define PUT_BITS_SP32(ux, x) \
	{ \
	volatile union {float f; unsigned int i;} _bitsy; \
	_bitsy.i = (ux); \
	x = _bitsy.f; \
	}

	#define GET_BITS_DP64(x, ux) \
	{ \
	volatile union {double d; unsigned long long i;} _bitsy; \
	_bitsy.d = (x); \
	ux = _bitsy.i; \
	}
	#define PUT_BITS_DP64(ux, x) \
	{ \
	volatile union {double d; unsigned long long i;} _bitsy; \
	_bitsy.i = (ux); \
	x = _bitsy.d; \
	}


	/* Processor-dependent floating-point status flags */
	#define AMD_F_INEXACT 0x00000020
	#define AMD_F_UNDERFLOW 0x00000010
	#define AMD_F_OVERFLOW 0x00000008
	#define AMD_F_DIVBYZERO 0x00000004
	#define AMD_F_INVALID 0x00000001

	/* Processor-dependent floating-point precision-control flags */
	#define AMD_F_EXTENDED 0x00000300
	#define AMD_F_DOUBLE 0x00000200
	#define AMD_F_SINGLE 0x00000000

	/* Processor-dependent floating-point rounding-control flags */
	#define AMD_F_RC_NEAREST 0x00000000
	#define AMD_F_RC_DOWN 0x00002000
	#define AMD_F_RC_UP 0x00004000
	#define AMD_F_RC_ZERO 0x00006000

	/* How to get hold of an assembly square root instruction:
	* ASMQRT(x,y) computes y = sqrt(x).
	*/
	#ifdef WINDOWS
	/* VC++ intrinsic call */
	#define ASMSQRT(x,y) _mm_store_sd(&y, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x)));
	#else
	/* Hammer sqrt instruction */
	#define ASMSQRT(x,y) asm volatile ("sqrtsd %1, %0" : "=x" (y) : "x" (x));
	#endif

	#endif /* LIBM_UTIL_AMD_H_INCLUDED */