| |
| /* |
| * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. All Rights Reserved. |
| * |
| * This file is part of libacml_mv. |
| * |
| * libacml_mv is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * libacml_mv is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with libacml_mv. If not, see |
| * <http://www.gnu.org/licenses/>. |
| * |
| */ |
| |
| |
| |
| #include "../inc/libm_amd.h" |
| #include "../inc/libm_util_amd.h" |
| |
| #define USE_SPLITEXP |
| #define USE_SCALEDOUBLE_1 |
| #define USE_SCALEDOUBLE_2 |
| #define USE_INFINITYF_WITH_FLAGS |
| #define USE_VALF_WITH_FLAGS |
| #include "../inc/libm_inlines_amd.h" |
| #undef USE_SPLITEXP |
| #undef USE_SCALEDOUBLE_1 |
| #undef USE_SCALEDOUBLE_2 |
| #undef USE_INFINITYF_WITH_FLAGS |
| #undef USE_VALF_WITH_FLAGS |
| |
| #include "../inc/libm_errno_amd.h" |
| |
| #ifndef WINDOWS |
| /* Deal with errno for out-of-range result */ |
| static inline float retval_errno_erange(float x) |
| { |
| struct exception exc; |
| exc.arg1 = (double)x; |
| exc.arg2 = (double)x; |
| exc.type = OVERFLOW; |
| exc.name = (char *)"coshf"; |
| if (_LIB_VERSION == _SVID_) |
| { |
| exc.retval = HUGE; |
| } |
| else |
| { |
| exc.retval = infinityf_with_flags(AMD_F_OVERFLOW); |
| } |
| if (_LIB_VERSION == _POSIX_) |
| __set_errno(ERANGE); |
| else if (!matherr(&exc)) |
| __set_errno(ERANGE); |
| return exc.retval; |
| } |
| |
| #endif |
| float FN_PROTOTYPE(coshf)(float fx) |
| { |
| /* |
| After dealing with special cases the computation is split into |
| regions as follows: |
| |
| abs(x) >= max_cosh_arg: |
| cosh(x) = sign(x)*Inf |
| |
| abs(x) >= small_threshold: |
| cosh(x) = sign(x)*exp(abs(x))/2 computed using the |
| splitexp and scaleDouble functions as for exp_amd(). |
| |
| abs(x) < small_threshold: |
| compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) |
| cosh(x) is then sign(x)*z. */ |
| |
| static const double |
| /* The max argument of coshf, but stored as a double */ |
| max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ |
| thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ |
| log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ |
| log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ |
| |
| small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; |
| // small_threshold = 20.0; |
| /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ |
| |
| /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ |
| |
| static const double sinh_lead[ 37] = { |
| 0.00000000000000000000e+00, /* 0x0000000000000000 */ |
| 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ |
| 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ |
| 1.00178749274099008204e+01, /* 0x40240926e70949ad */ |
| 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ |
| 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ |
| 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ |
| 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ |
| 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ |
| 4.05154190208278987484e+03, /* 0x40afa7157430966f */ |
| 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ |
| 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ |
| 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ |
| 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ |
| 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ |
| 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ |
| 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ |
| 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ |
| 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ |
| 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ |
| 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ |
| 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ |
| 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ |
| 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ |
| 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ |
| 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ |
| 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ |
| 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ |
| 7.23128532145737548828e+11, /* 0x42650bba3796379a */ |
| 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ |
| 5.34323729076223046875e+12, /* 0x429370470aec28ec */ |
| 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ |
| 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ |
| 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ |
| 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ |
| 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ |
| 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ |
| |
| static const double cosh_lead[ 37] = { |
| 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ |
| 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ |
| 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ |
| 1.00676619957777653269e+01, /* 0x402422a497d6185e */ |
| 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ |
| 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ |
| 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ |
| 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ |
| 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ |
| 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ |
| 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ |
| 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ |
| 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ |
| 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ |
| 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ |
| 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ |
| 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ |
| 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ |
| 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ |
| 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ |
| 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ |
| 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ |
| 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ |
| 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ |
| 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ |
| 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ |
| 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ |
| 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ |
| 7.23128532145737548828e+11, /* 0x42650bba3796379a */ |
| 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ |
| 5.34323729076223046875e+12, /* 0x429370470aec28ec */ |
| 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ |
| 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ |
| 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ |
| 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ |
| 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ |
| 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ |
| |
| unsigned long long ux, aux, xneg; |
| double x = fx, y, z, z1, z2; |
| int m; |
| |
| /* Special cases */ |
| |
| GET_BITS_DP64(x, ux); |
| aux = ux & ~SIGNBIT_DP64; |
| if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */ |
| { |
| if (aux == 0) return (float)1.0; /* with no inexact */ |
| if (LAMBDA_DP64 + x > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */ |
| } |
| else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ |
| { |
| if (aux > PINFBITPATT_DP64) /* |x| is a NaN? */ |
| return fx + fx; |
| else /* x is infinity */ |
| return infinityf_with_flags(0); |
| } |
| |
| xneg = (aux != ux); |
| |
| y = x; |
| if (xneg) y = -x; |
| |
| if (y >= max_cosh_arg) |
| { |
| /* Return infinity with overflow flag. */ |
| /* This handles POSIX behaviour */ |
| __set_errno(ERANGE); |
| z = infinityf_with_flags(AMD_F_OVERFLOW); |
| } |
| else if (y >= small_threshold) |
| { |
| /* In this range y is large enough so that |
| the negative exponential is negligible, |
| so cosh(y) is approximated by sign(x)*exp(y)/2. The |
| code below is an inlined version of that from |
| exp() with two changes (it operates on |
| y instead of x, and the division by 2 is |
| done by reducing m by 1). */ |
| |
| splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, |
| log2_by_32_tail, &m, &z1, &z2); |
| m -= 1; |
| |
| /* scaleDouble_1 is always safe because the argument x was |
| float, rather than double */ |
| |
| z = scaleDouble_1((z1+z2),m); |
| } |
| else |
| { |
| /* In this range we find the integer part y0 of y |
| and the increment dy = y - y0. We then compute |
| |
| z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) |
| z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) |
| |
| where sinh(y0) and cosh(y0) are tabulated above. */ |
| |
| int ind; |
| double dy, dy2, sdy, cdy; |
| |
| ind = (int)y; |
| dy = y - ind; |
| |
| dy2 = dy*dy; |
| |
| sdy = dy + dy*dy2*(0.166666666666666667013899e0 + |
| (0.833333333333329931873097e-2 + |
| (0.198412698413242405162014e-3 + |
| (0.275573191913636406057211e-5 + |
| (0.250521176994133472333666e-7 + |
| (0.160576793121939886190847e-9 + |
| 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); |
| |
| cdy = 1 + dy2*(0.500000000000000005911074e0 + |
| (0.416666666666660876512776e-1 + |
| (0.138888888889814854814536e-2 + |
| (0.248015872460622433115785e-4 + |
| (0.275573350756016588011357e-6 + |
| (0.208744349831471353536305e-8 + |
| 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); |
| |
| z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy; |
| } |
| |
| // if (xneg) z = - z; |
| return (float)z; |
| } |
| |
| weak_alias (__coshf, coshf) |