| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2018-2025 Rasmus Munk Larsen <rmlarsen@gmail.com> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_DOUBLE_WORD_H |
| #define EIGEN_ARCH_GENERIC_PACKET_MATH_DOUBLE_WORD_H |
| |
| // IWYU pragma: private |
| #include "../../InternalHeaderCheck.h" |
| |
| namespace Eigen { |
| namespace internal { |
| |
| // This function splits x into the nearest integer n and fractional part r, |
| // such that x = n + r holds exactly. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { |
| n = pround(x); |
| r = psub(x, n); |
| } |
| |
| // This function computes the sum {s, r}, such that x + y = s_hi + s_lo |
| // holds exactly, and s_hi = fl(x+y), if |x| >= |y|. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) { |
| s_hi = padd(x, y); |
| const Packet t = psub(s_hi, x); |
| s_lo = psub(y, t); |
| } |
| |
| #ifdef EIGEN_VECTORIZE_FMA |
| // This function implements the extended precision product of |
| // a pair of floating point numbers. Given {x, y}, it computes the pair |
| // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and |
| // p_hi = fl(x * y). |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { |
| p_hi = pmul(x, y); |
| p_lo = pmsub(x, y, p_hi); |
| } |
| |
| // A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that |
| // x * y = xy + p_lo holds exactly. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { |
| return pmsub(x, y, xy); |
| } |
| |
| #else |
| |
| // This function implements the Veltkamp splitting. Given a floating point |
| // number x it returns the pair {x_hi, x_lo} such that x_hi + x_lo = x holds |
| // exactly and that half of the significant of x fits in x_hi. |
| // This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions", |
| // 3rd edition, Birkh\"auser, 2016. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) { |
| typedef typename unpacket_traits<Packet>::type Scalar; |
| constexpr int shift = (NumTraits<Scalar>::digits() + 1) / 2; |
| const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr. |
| const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar(1)), x); |
| Packet rho = psub(x, gamma); |
| x_hi = padd(rho, gamma); |
| x_lo = psub(x, x_hi); |
| } |
| |
| // This function implements Dekker's algorithm for products x * y. |
| // Given floating point numbers {x, y} computes the pair |
| // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and |
| // p_hi = fl(x * y). |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { |
| Packet x_hi, x_lo, y_hi, y_lo; |
| veltkamp_splitting(x, x_hi, x_lo); |
| veltkamp_splitting(y, y_hi, y_lo); |
| |
| p_hi = pmul(x, y); |
| p_lo = pmadd(x_hi, y_hi, pnegate(p_hi)); |
| p_lo = pmadd(x_hi, y_lo, p_lo); |
| p_lo = pmadd(x_lo, y_hi, p_lo); |
| p_lo = pmadd(x_lo, y_lo, p_lo); |
| } |
| |
| // A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that |
| // x * y = xy + p_lo holds exactly. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { |
| Packet x_hi, x_lo, y_hi, y_lo; |
| veltkamp_splitting(x, x_hi, x_lo); |
| veltkamp_splitting(y, y_hi, y_lo); |
| |
| Packet p_lo = pmadd(x_hi, y_hi, pnegate(xy)); |
| p_lo = pmadd(x_hi, y_lo, p_lo); |
| p_lo = pmadd(x_lo, y_hi, p_lo); |
| p_lo = pmadd(x_lo, y_lo, p_lo); |
| return p_lo; |
| } |
| |
| #endif // EIGEN_VECTORIZE_FMA |
| |
| // This function implements Dekker's algorithm for the addition |
| // of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}. |
| // It returns the result as a pair {s_hi, s_lo} such that |
| // x_hi + x_lo + y_hi + y_lo = s_hi + s_lo holds exactly. |
| // This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions", |
| // 3rd edition, Birkh\"auser, 2016. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, |
| const Packet& y_lo, Packet& s_hi, Packet& s_lo) { |
| const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi)); |
| Packet r_hi_1, r_lo_1; |
| fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1); |
| Packet r_hi_2, r_lo_2; |
| fast_twosum(y_hi, x_hi, r_hi_2, r_lo_2); |
| const Packet r_hi = pselect(x_greater_mask, r_hi_1, r_hi_2); |
| |
| const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo); |
| const Packet s2 = padd(padd(x_lo, r_lo_2), y_lo); |
| const Packet s = pselect(x_greater_mask, s1, s2); |
| |
| fast_twosum(r_hi, s, s_hi, s_lo); |
| } |
| |
| // This is a version of twosum for double word numbers, |
| // which assumes that |x_hi| >= |y_hi|. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, |
| const Packet& y_lo, Packet& s_hi, Packet& s_lo) { |
| Packet r_hi, r_lo; |
| fast_twosum(x_hi, y_hi, r_hi, r_lo); |
| const Packet s = padd(padd(y_lo, r_lo), x_lo); |
| fast_twosum(r_hi, s, s_hi, s_lo); |
| } |
| |
| // This is a version of twosum for adding a floating point number x to |
| // double word number {y_hi, y_lo} number, with the assumption |
| // that |x| >= |y_hi|. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, |
| Packet& s_hi, Packet& s_lo) { |
| Packet r_hi, r_lo; |
| fast_twosum(x, y_hi, r_hi, r_lo); |
| const Packet s = padd(y_lo, r_lo); |
| fast_twosum(r_hi, s, s_hi, s_lo); |
| } |
| |
| // This function implements the multiplication of a double word |
| // number represented by {x_hi, x_lo} by a floating point number y. |
| // It returns the result as a pair {p_hi, p_lo} such that |
| // (x_hi + x_lo) * y = p_hi + p_lo hold with a relative error |
| // of less than 2*2^{-2p}, where p is the number of significand bit |
| // in the floating point type. |
| // This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions", |
| // 3rd edition, Birkh\"auser, 2016. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, |
| Packet& p_hi, Packet& p_lo) { |
| Packet c_hi, c_lo1; |
| twoprod(x_hi, y, c_hi, c_lo1); |
| const Packet c_lo2 = pmul(x_lo, y); |
| Packet t_hi, t_lo1; |
| fast_twosum(c_hi, c_lo2, t_hi, t_lo1); |
| const Packet t_lo2 = padd(t_lo1, c_lo1); |
| fast_twosum(t_hi, t_lo2, p_hi, p_lo); |
| } |
| |
| // This function implements the multiplication of two double word |
| // numbers represented by {x_hi, x_lo} and {y_hi, y_lo}. |
| // It returns the result as a pair {p_hi, p_lo} such that |
| // (x_hi + x_lo) * (y_hi + y_lo) = p_hi + p_lo holds with a relative error |
| // of less than 2*2^{-2p}, where p is the number of significand bit |
| // in the floating point type. |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, |
| const Packet& y_lo, Packet& p_hi, Packet& p_lo) { |
| Packet p_hi_hi, p_hi_lo; |
| twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo); |
| Packet p_lo_hi, p_lo_lo; |
| twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo); |
| fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo); |
| } |
| |
| // This function implements the division of double word {x_hi, x_lo} |
| // by float y. This is Algorithm 15 from "Tight and rigorous error bounds |
| // for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu, |
| // 2017. https://hal.archives-ouvertes.fr/hal-01351529 |
| template <typename Packet> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, |
| Packet& z_hi, Packet& z_lo) { |
| const Packet t_hi = pdiv(x_hi, y); |
| Packet pi_hi, pi_lo; |
| twoprod(t_hi, y, pi_hi, pi_lo); |
| const Packet delta_hi = psub(x_hi, pi_hi); |
| const Packet delta_t = psub(delta_hi, pi_lo); |
| const Packet delta = padd(delta_t, x_lo); |
| const Packet t_lo = pdiv(delta, y); |
| fast_twosum(t_hi, t_lo, z_hi, z_lo); |
| } |
| |
| } // end namespace internal |
| } // end namespace Eigen |
| |
| #endif // EIGEN_ARCH_GENERIC_PACKET_MATH_DOUBLE_WORD_H |