| /** |
| * This file has no copyright assigned and is placed in the Public Domain. |
| * This file is part of the mingw-w64 runtime package. |
| * No warranty is given; refer to the file DISCLAIMER.PD within this package. |
| */ |
| double fma(double x, double y, double z); |
| |
| #if defined(_ARM_) || defined(__arm__) |
| |
| /* Use hardware FMA on ARM. */ |
| double fma(double x, double y, double z){ |
| __asm__ ( |
| "fmacd %0, %1, %2 \n" |
| : "+w"(z) |
| : "w"(x), "w"(y) |
| ); |
| return z; |
| } |
| |
| #elif defined(_ARM64_) || defined(__aarch64__) |
| |
| /* Use hardware FMA on ARM64. */ |
| double fma(double x, double y, double z){ |
| __asm__ ( |
| "fmadd %d0, %d1, %d2, %d0 \n" |
| : "+w"(z) |
| : "w"(x), "w"(y) |
| ); |
| return z; |
| } |
| |
| #elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) |
| |
| #include <math.h> |
| #include <stdint.h> |
| |
| /* This is in accordance with the IEC 559 double-precision format. |
| * Be advised that due to the hidden bit, the higher half actually has 26 bits. |
| * Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot |
| * avoid. It is kept in the very last position. |
| */ |
| typedef union iec559_double_ { |
| struct __attribute__((__packed__)) { |
| uint64_t mlo : 27; |
| uint64_t mhi : 25; |
| uint64_t exp : 11; |
| uint64_t sgn : 1; |
| }; |
| double f; |
| } iec559_double; |
| |
| static inline void break_down(iec559_double *restrict lo, iec559_double *restrict hi, double x) { |
| hi->f = x; |
| /* Erase low-order significant bits. `hi->f` now has only 26 significant bits. */ |
| hi->mlo = 0; |
| /* Store the low-order half. It will be normalized by the hardware. */ |
| lo->f = x - hi->f; |
| /* Preserve signness in case of zero. */ |
| lo->sgn = hi->sgn; |
| } |
| |
| double fma(double x, double y, double z) { |
| /* |
| POSIX-2013: |
| 1. If x or y are NaN, a NaN shall be returned. |
| 2. If x multiplied by y is an exact infinity and z is also an infinity |
| but with the opposite sign, a domain error shall occur, and either a NaN |
| (if supported), or an implementation-defined value shall be returned. |
| 3. If one of x and y is infinite, the other is zero, and z is not a NaN, |
| a domain error shall occur, and either a NaN (if supported), or an |
| implementation-defined value shall be returned. |
| 4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN |
| shall be returned and a domain error may occur. |
| 5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned. |
| */ |
| /* Check whether the result is finite. */ |
| double ret = x * y + z; |
| if(!isfinite(ret)) { |
| return ret; /* If this naive check doesn't yield a finite value, the FMA isn't |
| likely to return one either. Forward the value as is. */ |
| } |
| iec559_double xlo, xhi, ylo, yhi; |
| break_down(&xlo, &xhi, x); |
| break_down(&ylo, &yhi, y); |
| /* The order of these four statements is essential. Don't move them around. */ |
| ret = z; |
| ret += xhi.f * yhi.f; /* The most significant item comes first. */ |
| ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */ |
| ret += xlo.f * ylo.f; /* The least significant item comes last. */ |
| return ret; |
| } |
| |
| #else |
| |
| #error Please add FMA implementation for this platform. |
| |
| #endif |