blob: 063d526694a054608b2e248dd5b8cdbabe4020ab [file] [log] [blame]
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved.
*
* This file is part of libacml_mv.
*
* libacml_mv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libacml_mv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libacml_mv. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
#include "../inc/libm_amd.h"
#include "../inc/libm_util_amd.h"
#define USE_SCALEDOUBLE_1
#define USE_INFINITY_WITH_FLAGS
#define USE_HANDLE_ERROR
#include "../inc/libm_inlines_amd.h"
#undef USE_SCALEDOUBLE_1
#undef USE_INFINITY_WITH_FLAGS
#undef USE_HANDLE_ERROR
#include "../inc/libm_errno_amd.h"
#ifndef WINDOWS
/* Deal with errno for out-of-range result */
static inline double retval_errno_erange_overflow(double x, double y)
{
struct exception exc;
exc.arg1 = x;
exc.arg2 = y;
exc.type = OVERFLOW;
exc.name = (char *)"hypot";
if (_LIB_VERSION == _SVID_)
exc.retval = HUGE;
else
exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
if (_LIB_VERSION == _POSIX_)
__set_errno(ERANGE);
else if (!matherr(&exc))
__set_errno(ERANGE);
return exc.retval;
}
#endif
#ifdef WINDOWS
double FN_PROTOTYPE(hypot)(double x, double y)
#else
double FN_PROTOTYPE(hypot)(double x, double y)
#endif
{
/* Returns sqrt(x*x + y*y) with no overflow or underflow unless
the result warrants it */
const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
unsigned long long xexp, yexp, ux, uy, ut;
int dexp, expadjust;
GET_BITS_DP64(x, ux);
ux &= ~SIGNBIT_DP64;
GET_BITS_DP64(y, uy);
uy &= ~SIGNBIT_DP64;
xexp = (ux >> EXPSHIFTBITS_DP64);
yexp = (uy >> EXPSHIFTBITS_DP64);
if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1)
{
/* One or both of the arguments are NaN or infinity. The
result will also be NaN or infinity. */
retval = x*x + y*y;
if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||
((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
/* x or y is infinity. ISO C99 defines that we must
return +infinity, even if the other argument is NaN.
Note that the computation of x*x + y*y above will already
have raised invalid if either x or y is a signalling NaN. */
return infinity_with_flags(0);
else
/* One or both of x or y is NaN, and neither is infinity.
Raise invalid if it's a signalling NaN */
return retval;
}
/* Set x = abs(x) and y = abs(y) */
PUT_BITS_DP64(ux, x);
PUT_BITS_DP64(uy, y);
/* The difference in exponents between x and y */
dexp = (int)(xexp - yexp);
expadjust = 0;
if (ux == 0)
/* x is zero */
return y;
else if (uy == 0)
/* y is zero */
return x;
else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1)
/* One of x and y is insignificant compared to the other */
return x + y; /* Raise inexact */
else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500)
{
/* Danger of overflow; scale down by 2**600. */
expadjust = 600;
ux -= 0x2580000000000000;
PUT_BITS_DP64(ux, x);
uy -= 0x2580000000000000;
PUT_BITS_DP64(uy, y);
}
else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500)
{
/* Danger of underflow; scale up by 2**600. */
expadjust = -600;
if (xexp == 0)
{
/* x is denormal - handle by adding 601 to the exponent
and then subtracting a correction for the implicit bit */
PUT_BITS_DP64(ux + 0x2590000000000000, x);
x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
GET_BITS_DP64(x, ux);
}
else
{
/* x is normal - just increase the exponent by 600 */
ux += 0x2580000000000000;
PUT_BITS_DP64(ux, x);
}
if (yexp == 0)
{
PUT_BITS_DP64(uy + 0x2590000000000000, y);
y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
GET_BITS_DP64(y, uy);
}
else
{
uy += 0x2580000000000000;
PUT_BITS_DP64(uy, y);
}
}
#ifdef FAST_BUT_GREATER_THAN_ONE_ULP
/* Not awful, but results in accuracy loss larger than 1 ulp */
r = x*x + y*y
#else
/* Slower but more accurate */
/* Sort so that x is greater than y */
if (x < y)
{
u = y;
y = x;
x = u;
ut = ux;
ux = uy;
uy = ut;
}
/* Split x into hx and tx, head and tail */
PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
tx = x - hx;
PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
ty = y - hy;
/* Compute r = x*x + y*y with extra precision */
x2 = x*x;
y2 = y*y;
hs = x2 + y2;
if (dexp == 0)
/* We take most care when x and y have equal exponents,
i.e. are almost the same size */
ts = (((x2 - hs) + y2) +
((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
((hy * hy - y2) + 2 * hy * ty) + ty * ty;
else
ts = (((x2 - hs) + y2) +
((hx * hx - x2) + 2 * hx * tx) + tx * tx);
r = hs + ts;
#endif
/* The sqrt can introduce another half ulp error. */
#ifdef WINDOWS
/* VC++ intrinsic call */
_mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
#else
/* Hammer sqrt instruction */
asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r));
#endif
/* If necessary scale the result back. This may lead to
overflow but if so that's the correct result. */
retval = scaleDouble_1(retval, expadjust);
if (retval > large)
/* The result overflowed. Deal with errno. */
#ifdef WINDOWS
return handle_error("hypot", PINFBITPATT_DP64, _OVERFLOW,
AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y);
#else
return retval_errno_erange_overflow(x, y);
#endif
return retval;
}
weak_alias (__hypot, hypot)