blob: 689c6d82764310649728e9fb3bfd1bb20d0f0cdd [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_MATHFUNCTIONSIMPL_H
#define EIGEN_MATHFUNCTIONSIMPL_H
// IWYU pragma: private
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/** \internal Fast reciprocal using Newton-Raphson's method.
Preconditions:
1. The starting guess provided in approx_a_recip must have at least half
the leading mantissa bits in the correct result, such that a single
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
result.
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
If the preconditions are satisfied, which they are for for the _*_rcp_ps
instructions on x86, the result has a maximum relative error of 2 ulps,
and correctly handles reciprocals of zero, infinity, and NaN.
*/
template <typename Packet, int Steps>
struct generic_reciprocal_newton_step {
static_assert(Steps > 0, "Steps must be at least 1.");
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) {
using Scalar = typename unpacket_traits<Packet>::type;
const Packet two = pset1<Packet>(Scalar(2));
// Refine the approximation using one Newton-Raphson step:
// x_{i} = x_{i-1} * (2 - a * x_{i-1})
const Packet x = generic_reciprocal_newton_step<Packet, Steps - 1>::run(a, approx_a_recip);
const Packet tmp = pnmadd(a, x, two);
// If tmp is NaN, it means that a is either +/-0 or +/-Inf.
// In this case return the approximation directly.
const Packet is_not_nan = pcmp_eq(tmp, tmp);
return pselect(is_not_nan, pmul(x, tmp), x);
}
};
template <typename Packet>
struct generic_reciprocal_newton_step<Packet, 0> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
return approx_rsqrt;
}
};
/** \internal Fast reciprocal sqrt using Newton-Raphson's method.
Preconditions:
1. The starting guess provided in approx_a_recip must have at least half
the leading mantissa bits in the correct result, such that a single
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
result.
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
If the preconditions are satisfied, which they are for for the _*_rcp_ps
instructions on x86, the result has a maximum relative error of 2 ulps,
and correctly handles zero, infinity, and NaN. Positive denormals are
treated as zero.
*/
template <typename Packet, int Steps>
struct generic_rsqrt_newton_step {
static_assert(Steps > 0, "Steps must be at least 1.");
using Scalar = typename unpacket_traits<Packet>::type;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) {
constexpr Scalar kMinusHalf = Scalar(-1) / Scalar(2);
const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
Packet inv_sqrt = approx_rsqrt;
for (int step = 0; step < Steps; ++step) {
// Refine the approximation using one Newton-Raphson step:
// h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
// inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
Packet r2 = pmul(a, inv_sqrt);
Packet half_r = pmul(inv_sqrt, cst_minus_half);
Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
}
// If x is NaN, then either:
// 1) the input is NaN
// 2) zero and infinity were multiplied
// In either of these cases, return approx_rsqrt
return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt);
}
};
template <typename Packet>
struct generic_rsqrt_newton_step<Packet, 0> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
return approx_rsqrt;
}
};
/** \internal Fast sqrt using Newton-Raphson's method.
Preconditions:
1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must
have at least half the leading mantissa bits in the correct result, such
that a single Newton-Raphson step is sufficient to get within 1-2 ulps of
the currect result.
2. If a is zero, approx_rsqrt must be infinite.
3. If a is infinite, approx_rsqrt must be zero.
If the preconditions are satisfied, which they are for for the _*_rsqrt_ps
instructions on x86, the result has a maximum relative error of 2 ulps,
and correctly handles zero and infinity, and NaN. Positive denormal inputs
are treated as zero.
*/
template <typename Packet, int Steps = 1>
struct generic_sqrt_newton_step {
static_assert(Steps > 0, "Steps must be at least 1.");
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) {
using Scalar = typename unpacket_traits<Packet>::type;
const Packet one_point_five = pset1<Packet>(Scalar(1.5));
const Packet minus_half = pset1<Packet>(Scalar(-0.5));
// If a is inf or zero, return a directly.
const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
// Do a single step of Newton's iteration for reciprocal square root:
// x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
// The Newton's step is computed this way to avoid over/under-flows.
Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
for (int step = 1; step < Steps; ++step) {
rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
}
// Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
// Return a itself for 0 or +inf, NaN for negative arguments.
return pselect(return_a, a, pmul(a, rsqrt));
}
};
template <typename RealScalar>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) {
// IEEE IEC 6059 special cases.
if ((numext::isinf)(x) || (numext::isinf)(y)) return NumTraits<RealScalar>::infinity();
if ((numext::isnan)(x) || (numext::isnan)(y)) return NumTraits<RealScalar>::quiet_NaN();
EIGEN_USING_STD(sqrt);
RealScalar p, qp;
p = numext::maxi(x, y);
if (numext::is_exactly_zero(p)) return RealScalar(0);
qp = numext::mini(y, x) / p;
return p * sqrt(RealScalar(1) + qp * qp);
}
template <typename Scalar>
struct hypot_impl {
typedef typename NumTraits<Scalar>::Real RealScalar;
static EIGEN_DEVICE_FUNC inline RealScalar run(const Scalar& x, const Scalar& y) {
EIGEN_USING_STD(abs);
return positive_real_hypot<RealScalar>(abs(x), abs(y));
}
};
// Generic complex sqrt implementation that correctly handles corner cases
// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
template <typename T>
EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
// Computes the principal sqrt of the input.
//
// For a complex square root of the number x + i*y. We want to find real
// numbers u and v such that
// (u + i*v)^2 = x + i*y <=>
// u^2 - v^2 + i*2*u*v = x + i*v.
// By equating the real and imaginary parts we get:
// u^2 - v^2 = x
// 2*u*v = y.
//
// For x >= 0, this has the numerically stable solution
// u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
// v = y / (2 * u)
// and for x < 0,
// v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
// u = y / (2 * v)
//
// Letting w = sqrt(0.5 * (|x| + |z|)),
// if x == 0: u = w, v = sign(y) * w
// if x > 0: u = w, v = y / (2 * w)
// if x < 0: u = |y| / (2 * w), v = sign(y) * w
const T x = numext::real(z);
const T y = numext::imag(z);
const T zero = T(0);
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
return (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
: numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
: x > zero ? std::complex<T>(w, y / (2 * w))
: std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w);
}
// Generic complex rsqrt implementation.
template <typename T>
EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
// Computes the principal reciprocal sqrt of the input.
//
// For a complex reciprocal square root of the number z = x + i*y. We want to
// find real numbers u and v such that
// (u + i*v)^2 = 1 / (x + i*y) <=>
// u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2.
// By equating the real and imaginary parts we get:
// u^2 - v^2 = x/|z|^2
// 2*u*v = y/|z|^2.
//
// For x >= 0, this has the numerically stable solution
// u = sqrt(0.5 * (x + |z|)) / |z|
// v = -y / (2 * u * |z|)
// and for x < 0,
// v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z|
// u = -y / (2 * v * |z|)
//
// Letting w = sqrt(0.5 * (|x| + |z|)),
// if x == 0: u = w / |z|, v = -sign(y) * w / |z|
// if x > 0: u = w / |z|, v = -y / (2 * w * |z|)
// if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
const T x = numext::real(z);
const T y = numext::imag(z);
const T zero = T(0);
const T abs_z = numext::hypot(x, y);
const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
const T woz = w / abs_z;
// Corner cases consistent with 1/sqrt(z) on gcc/clang.
return numext::is_exactly_zero(abs_z) ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
: ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
: numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz)
: x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
: std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
}
template <typename T>
EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
// Computes complex log.
T a = numext::abs(z);
EIGEN_USING_STD(atan2);
T b = atan2(z.imag(), z.real());
return std::complex<T>(numext::log(a), b);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATHFUNCTIONSIMPL_H