Update Eigen to commit:f19a6803c87cb8486c3147cd180edb02c9ff9a68 CHANGELOG ========= f19a6803c - Refactor special case handling in pow(x,y) and revert to repeated squaring for <float,int> 5064cb7d5 - Add test for using pcast on scalars. 1ea61a5d2 - Improve pow(x,y): 25% speedup, increase accuracy for integer exponents. 8ad4344ca - optimize setConstant, setZero PiperOrigin-RevId: 700731159 Change-Id: I762ac07c755af7d2e5d030e3a09291fedc604d1a
diff --git a/Eigen/Core b/Eigen/Core index e6dbe3a..f87f3d2 100644 --- a/Eigen/Core +++ b/Eigen/Core
@@ -318,6 +318,7 @@ #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/Fill.h" #include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h"
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f40b2f4..e70d555 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h
@@ -737,20 +737,6 @@ dense_assignment_loop<Kernel>::run(kernel); } -// Specialization for filling the destination with a constant value. -#if !EIGEN_COMP_MSVC -#ifndef EIGEN_GPU_COMPILE_PHASE -template <typename DstXprType> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop( - DstXprType& dst, - const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, - const internal::assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>& func) { - resize_if_allowed(dst, src, func); - std::fill_n(dst.data(), dst.size(), src.functor()()); -} -#endif -#endif - template <typename DstXprType, typename SrcXprType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 39c33cf..86ddd5e 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -343,7 +343,8 @@ */ template <typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) { - return derived() = Constant(rows(), cols(), val); + internal::eigen_fill_impl<Derived>::run(derived(), val); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. @@ -547,7 +548,8 @@ */ template <typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() { - return setConstant(Scalar(0)); + internal::eigen_zero_impl<Derived>::run(derived()); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to zero. @@ -562,7 +564,7 @@ template <typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index newSize) { resize(newSize); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, and sets all coefficients in this expression to zero. @@ -578,7 +580,7 @@ template <typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index rows, Index cols) { resize(rows, cols); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, changing only the number of columns, and sets all
diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h new file mode 100644 index 0000000..048b51d --- /dev/null +++ b/Eigen/src/Core/Fill.h
@@ -0,0 +1,94 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser <cs.schlosser@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FILL_H +#define EIGEN_FILL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template <typename Xpr> +struct eigen_fill_helper : std::false_type {}; + +template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +struct eigen_fill_helper<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {}; + +template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +struct eigen_fill_helper<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {}; + +template <typename Xpr, int BlockRows, int BlockCols> +struct eigen_fill_helper<Block<Xpr, BlockRows, BlockCols, /*InnerPanel*/ true>> : eigen_fill_helper<Xpr> {}; + +template <typename Xpr, int BlockRows, int BlockCols> +struct eigen_fill_helper<Block<Xpr, BlockRows, BlockCols, /*InnerPanel*/ false>> + : std::integral_constant<bool, eigen_fill_helper<Xpr>::value && + (Xpr::IsRowMajor ? (BlockRows == 1) : (BlockCols == 1))> {}; + +template <typename Xpr, int Options, typename StrideType> +struct eigen_fill_helper<Map<Xpr, Options, StrideType>> + : std::integral_constant<bool, eigen_fill_helper<Xpr>::value && + (evaluator<Map<Xpr, Options, StrideType>>::Flags & LinearAccessBit)> {}; + +template <typename Xpr, bool use_fill = eigen_fill_helper<Xpr>::value> +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + using Func = scalar_constant_op<Scalar>; + using PlainObject = typename Xpr::PlainObject; + using Constant = CwiseNullaryOp<Func, PlainObject>; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + dst = Constant(dst.rows(), dst.cols(), Func(val)); + } +}; + +#if !EIGEN_COMP_MSVC +#ifndef EIGEN_GPU_COMPILE_PHASE +template <typename Xpr> +struct eigen_fill_impl<Xpr, /*use_fill*/ true> { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + EIGEN_USING_STD(fill_n); + fill_n(dst.data(), dst.size(), val); + } +}; +#endif +#endif + +template <typename Xpr> +struct eigen_memset_helper { + static constexpr bool value = std::is_trivial<typename Xpr::Scalar>::value && eigen_fill_helper<Xpr>::value; +}; + +template <typename Xpr, bool use_memset = eigen_memset_helper<Xpr>::value> +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { eigen_fill_impl<Xpr, false>::run(dst, Scalar(0)); } +}; + +template <typename Xpr> +struct eigen_zero_impl<Xpr, /*use_memset*/ true> { + using Scalar = typename Xpr::Scalar; + static constexpr size_t max_bytes = (std::numeric_limits<std::ptrdiff_t>::max)(); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { + const size_t num_bytes = dst.size() * sizeof(Scalar); +#ifndef EIGEN_NO_DEBUG + if (num_bytes > max_bytes) throw_std_bad_alloc(); +#endif + EIGEN_USING_STD(memset); + memset(dst.data(), 0, num_bytes); + } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_FILL_H
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index c7a9846..26a4634 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -194,7 +194,7 @@ static constexpr int Size = unpacket_traits<Packet>::size; using DefaultPacket = typename packet_traits<Scalar>::type; static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size; - static constexpr bool value = Size < DefaultSize; + static constexpr bool value = Size != 1 && Size < DefaultSize; }; template <typename Src, typename Tgt>
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 652892e..174eb57 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -1837,81 +1837,53 @@ }; // This specialization uses a more accurate algorithm to compute log2(x) for -// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~6.42e-10. +// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~6.56508e-10. // This additional accuracy is needed to counter the error-magnification // inherent in multiplying by a potentially large exponent in pow(x,y). -// The minimax polynomial used was calculated using the Sollya tool. -// See sollya.org. +// The minimax polynomial used was calculated using the Rminimax tool, +// see https://gitlab.inria.fr/sfilip/rminimax. +// Command line: +// $ ratapprox --function="log2(1+x)/x" --dom='[-0.2929,0.41422]' +// --type=[10,0] +// --numF="[D,D,SG]" --denF="[SG]" --log --dispCoeff="dec" +// +// The resulting implementation of pow(x,y) is accurate to 3 ulps. template <> struct accurate_log2<float> { template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) { - // The function log(1+x)/x is approximated in the interval - // [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form - // Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))), - // where the degree 6 polynomial P(x) is evaluated in single precision, - // while the remaining 4 terms of Q(x), as well as the final multiplication by x - // to reconstruct log(1+x) are evaluated in extra precision using - // double word arithmetic. C0 through C3 are extra precise constants - // stored as double words. - // - // The polynomial coefficients were calculated using Sollya commands: - // > n = 10; - // > f = log2(1+x)/x; - // > interval = [sqrt(0.5)-1;sqrt(2)-1]; - // > p = fpminimax(f,n,[|double,double,double,double,single...|],interval,relative,floating); + // Split the two lowest order constant coefficient into double-word representation. + constexpr double kC0 = 1.442695041742110273474963832995854318141937255859375e+00; + constexpr float kC0_hi = static_cast<float>(kC0); + constexpr float kC0_lo = static_cast<float>(kC0 - static_cast<double>(kC0_hi)); + const Packet c0_hi = pset1<Packet>(kC0_hi); + const Packet c0_lo = pset1<Packet>(kC0_lo); - const Packet p6 = pset1<Packet>(9.703654795885e-2f); - const Packet p5 = pset1<Packet>(-0.1690667718648f); - const Packet p4 = pset1<Packet>(0.1720575392246f); - const Packet p3 = pset1<Packet>(-0.1789081543684f); - const Packet p2 = pset1<Packet>(0.2050433009862f); - const Packet p1 = pset1<Packet>(-0.2404672354459f); - const Packet p0 = pset1<Packet>(0.2885761857032f); + constexpr double kC1 = -7.2134751588268664068692714863573201000690460205078125e-01; + constexpr float kC1_hi = static_cast<float>(kC1); + constexpr float kC1_lo = static_cast<float>(kC1 - static_cast<double>(kC1_hi)); + const Packet c1_hi = pset1<Packet>(kC1_hi); + const Packet c1_lo = pset1<Packet>(kC1_lo); - const Packet C3_hi = pset1<Packet>(-0.360674142838f); - const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f); - const Packet C2_hi = pset1<Packet>(0.480897903442f); - const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f); - const Packet C1_hi = pset1<Packet>(-0.721347510815f); - const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f); - const Packet C0_hi = pset1<Packet>(1.44269502163f); - const Packet C0_lo = pset1<Packet>(2.01711713999e-08f); + constexpr float c[] = { + 9.7010828554630279541015625e-02, -1.6896486282348632812500000e-01, 1.7200836539268493652343750e-01, + -1.7892272770404815673828125e-01, 2.0505344867706298828125000e-01, -2.4046677350997924804687500e-01, + 2.8857553005218505859375000e-01, -3.6067414283752441406250000e-01, 4.8089790344238281250000000e-01}; + + // Evaluate the higher order terms in the polynomial using + // standard arithmetic. const Packet one = pset1<Packet>(1.0f); - const Packet x = psub(z, one); - // Evaluate P(x) in working precision. - // We evaluate it in multiple parts to improve instruction level - // parallelism. - Packet x2 = pmul(x, x); - Packet p_even = pmadd(p6, x2, p4); - p_even = pmadd(p_even, x2, p2); - p_even = pmadd(p_even, x2, p0); - Packet p_odd = pmadd(p5, x2, p3); - p_odd = pmadd(p_odd, x2, p1); - Packet p = pmadd(p_odd, x, p_even); - - // Now evaluate the low-order tems of Q(x) in double word precision. - // In the following, due to the alternating signs and the fact that - // |x| < sqrt(2)-1, we can assume that |C*_hi| >= q_i, and use - // fast_twosum instead of the slower twosum. - Packet q_hi, q_lo; - Packet t_hi, t_lo; - // C3 + x * p(x) - twoprod(p, x, t_hi, t_lo); - fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo); - // C2 + x * p(x) - twoprod(q_hi, q_lo, x, t_hi, t_lo); - fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo); - // C1 + x * p(x) - twoprod(q_hi, q_lo, x, t_hi, t_lo); - fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo); - // C0 + x * p(x) - twoprod(q_hi, q_lo, x, t_hi, t_lo); - fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo); - - // log(z) ~= x * Q(x) - twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo); + Packet p = ppolevl<Packet, 8>::run(x, c); + // Evaluate the final two step in Horner's rule using double-word + // arithmetic. + Packet p_hi, p_lo; + twoprod(x, p, p_hi, p_lo); + fast_twosum(c1_hi, c1_lo, p_hi, p_lo, p_hi, p_lo); + twoprod(p_hi, p_lo, x, p_hi, p_lo); + fast_twosum(c0_hi, c0_lo, p_hi, p_lo, p_hi, p_lo); + // Multiply by x to recover log2(z). + twoprod(p_hi, p_lo, x, log2_x_hi, log2_x_lo); } }; @@ -2006,16 +1978,6 @@ } }; -// This function accurately computes exp2(x) for x in [-0.5:0.5], which is -// needed in pow(x,y). -template <typename Scalar> -struct fast_accurate_exp2 { - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { - return generic_exp2(x); - } -}; - // This function implements the non-trivial case of pow(x,y) where x is // positive and y is (possibly) non-integer. // Formally, pow(x,y) = exp2(y * log2(x)), where exp2(x) is shorthand for 2^x. @@ -2081,69 +2043,91 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Packet& x, const Packet& y) { typedef typename unpacket_traits<Packet>::type Scalar; - const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity()); - const Packet cst_neg_inf = pset1<Packet>(-NumTraits<Scalar>::infinity()); + const Packet cst_inf = pset1<Packet>(NumTraits<Scalar>::infinity()); const Packet cst_zero = pset1<Packet>(Scalar(0)); const Packet cst_one = pset1<Packet>(Scalar(1)); const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN()); - const Packet abs_x = pabs(x); + const Packet x_abs = pabs(x); + Packet pow = generic_pow_impl(x_abs, y); + + // In the following we enforce the special case handling prescribed in + // https://en.cppreference.com/w/cpp/numeric/math/pow. + // Predicates for sign and magnitude of x. - const Packet abs_x_is_zero = pcmp_eq(abs_x, cst_zero); + const Packet x_is_negative = pcmp_lt(x, cst_zero); + const Packet x_is_zero = pcmp_eq(x, cst_zero); + const Packet x_is_one = pcmp_eq(x, cst_one); const Packet x_has_signbit = psignbit(x); - const Packet x_is_neg = pandnot(x_has_signbit, abs_x_is_zero); - const Packet x_is_neg_zero = pand(x_has_signbit, abs_x_is_zero); - const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf); - const Packet abs_x_is_one = pcmp_eq(abs_x, cst_one); - const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x); - const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one); - const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg); - const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg); - const Packet x_is_nan = pisnan(x); + const Packet x_abs_gt_one = pcmp_lt(cst_one, x_abs); + const Packet x_abs_is_inf = pcmp_eq(x_abs, cst_inf); // Predicates for sign and magnitude of y. - const Packet abs_y = pabs(y); + const Packet y_abs = pabs(y); + const Packet y_abs_is_inf = pcmp_eq(y_abs, cst_inf); + const Packet y_is_negative = pcmp_lt(y, cst_zero); + const Packet y_is_zero = pcmp_eq(y, cst_zero); const Packet y_is_one = pcmp_eq(y, cst_one); - const Packet abs_y_is_zero = pcmp_eq(abs_y, cst_zero); - const Packet y_is_neg = pcmp_lt(y, cst_zero); - const Packet y_is_pos = pandnot(ptrue(y), por(abs_y_is_zero, y_is_neg)); - const Packet y_is_nan = pisnan(y); - const Packet abs_y_is_inf = pcmp_eq(abs_y, cst_pos_inf); - EIGEN_CONSTEXPR Scalar huge_exponent = - (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) / NumTraits<Scalar>::epsilon(); - const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y)); - - // Predicates for whether y is integer and/or even. - const Packet y_is_int = pcmp_eq(pfloor(y), y); + // Predicates for whether y is integer and odd/even. + const Packet y_is_int = pandnot(pcmp_eq(pfloor(y), y), y_abs_is_inf); const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5))); const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2); + const Packet y_is_odd_int = pandnot(y_is_int, y_is_even); + // Smallest exponent for which (1 + epsilon) overflows to infinity. + EIGEN_CONSTEXPR Scalar huge_exponent = + (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) / NumTraits<Scalar>::epsilon(); + const Packet y_abs_is_huge = pcmp_le(pset1<Packet>(huge_exponent), y_abs); - // Predicates encoding special cases for the value of pow(x,y) - const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf), y_is_int), abs_y_is_inf); - const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan)); - const Packet pow_is_one = - por(por(x_is_one, abs_y_is_zero), pand(x_is_neg_one, por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x)))); - const Packet pow_is_zero = por(por(por(pand(abs_x_is_zero, y_is_pos), pand(abs_x_is_inf, y_is_neg)), - pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_pos)), - pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_neg)); - const Packet pow_is_inf = por(por(por(pand(abs_x_is_zero, y_is_neg), pand(abs_x_is_inf, y_is_pos)), - pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_neg)), - pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_pos)); - const Packet pow_is_neg_zero = pand(pandnot(y_is_int, y_is_even), - por(pand(y_is_neg, pand(abs_x_is_inf, x_is_neg)), pand(y_is_pos, x_is_neg_zero))); - const Packet inf_val = - pselect(pandnot(pand(por(pand(abs_x_is_inf, x_is_neg), pand(x_is_neg_zero, y_is_neg)), y_is_int), y_is_even), - cst_neg_inf, cst_pos_inf); - // General computation of pow(x,y) for positive x or negative x and integer y. - const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even); - const Packet pow_abs = generic_pow_impl(abs_x, y); - return pselect(y_is_one, x, - pselect(pow_is_one, cst_one, - pselect(pow_is_nan, cst_nan, - pselect(pow_is_inf, inf_val, - pselect(pow_is_neg_zero, pnegate(cst_zero), - pselect(pow_is_zero, cst_zero, - pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))))); + // * pow(base, exp) returns NaN if base is finite and negative + // and exp is finite and non-integer. + pow = pselect(pandnot(x_is_negative, y_is_int), cst_nan, pow); + + // * pow(±0, exp), where exp is negative, finite, and is an even integer or + // a non-integer, returns +∞ + // * pow(±0, exp), where exp is positive non-integer or a positive even + // integer, returns +0 + // * pow(+0, exp), where exp is a negative odd integer, returns +∞ + // * pow(-0, exp), where exp is a negative odd integer, returns -∞ + // * pow(+0, exp), where exp is a positive odd integer, returns +0 + // * pow(-0, exp), where exp is a positive odd integer, returns -0 + // Sign is flipped by the rule below. + pow = pselect(x_is_zero, pselect(y_is_negative, cst_inf, cst_zero), pow); + + // pow(base, exp) returns -pow(abs(base), exp) if base has the sign bit set, + // and exp is an odd integer exponent. + pow = pselect(pand(x_has_signbit, y_is_odd_int), pnegate(pow), pow); + + // * pow(base, -∞) returns +∞ for any |base|<1 + // * pow(base, -∞) returns +0 for any |base|>1 + // * pow(base, +∞) returns +0 for any |base|<1 + // * pow(base, +∞) returns +∞ for any |base|>1 + // * pow(±0, -∞) returns +∞ + // * pow(-1, +-∞) = 1 + Packet inf_y_val = pselect(por(pand(y_is_negative, x_is_zero), pxor(y_is_negative, x_abs_gt_one)), cst_inf, cst_zero); + inf_y_val = pselect(pcmp_eq(x, pset1<Packet>(Scalar(-1.0))), cst_one, inf_y_val); + pow = pselect(y_abs_is_huge, inf_y_val, pow); + + // * pow(+∞, exp) returns +0 for any negative exp + // * pow(+∞, exp) returns +∞ for any positive exp + // * pow(-∞, exp) returns -0 if exp is a negative odd integer. + // * pow(-∞, exp) returns +0 if exp is a negative non-integer or negative + // even integer. + // * pow(-∞, exp) returns -∞ if exp is a positive odd integer. + // * pow(-∞, exp) returns +∞ if exp is a positive non-integer or positive + // even integer. + auto x_pos_inf_value = pselect(y_is_negative, cst_zero, cst_inf); + auto x_neg_inf_value = pselect(y_is_odd_int, pnegate(x_pos_inf_value), x_pos_inf_value); + pow = pselect(x_abs_is_inf, pselect(x_is_negative, x_neg_inf_value, x_pos_inf_value), pow); + + // All cases of NaN inputs return NaN, except the two below. + pow = pselect(por(pisnan(x), pisnan(y)), cst_nan, pow); + + // * pow(base, 1) returns base. + // * pow(base, +/-0) returns 1, regardless of base, even NaN. + // * pow(+1, exp) returns 1, regardless of exponent, even NaN. + pow = pselect(y_is_one, x, pselect(por(x_is_one, y_is_zero), cst_one, pow)); + + return pow; } namespace unary_pow { @@ -2343,7 +2327,12 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const ScalarExponent& exponent) { const bool exponent_is_integer = (numext::isfinite)(exponent) && numext::round(exponent) == exponent; if (exponent_is_integer) { - return unary_pow::int_pow(x, exponent); + // The simple recursive doubling implementation is only accurate to 3 ulps + // for integer exponents in [-3:7]. Since this is a common case, we + // specialize it here. + bool use_repeated_squaring = + (exponent <= ScalarExponent(7) && (!ExponentIsSigned || exponent >= ScalarExponent(-3))); + return use_repeated_squaring ? unary_pow::int_pow(x, exponent) : generic_pow(x, pset1<Packet>(exponent)); } else { Packet result = unary_pow::gen_pow(x, exponent); result = unary_pow::handle_nonint_nonint_errors(x, result, exponent);
diff --git a/test/block.cpp b/test/block.cpp index c67dd22..8f7c8de 100644 --- a/test/block.cpp +++ b/test/block.cpp
@@ -73,6 +73,21 @@ block_real_only(m1, r1, r2, c1, c1, s1); + // test fill logic with innerpanel and non-innerpanel blocks + m1.row(r1).setConstant(s1); + VERIFY_IS_CWISE_EQUAL(m1.row(r1), DynamicVectorType::Constant(cols, s1).transpose()); + m1 = m1_copy; + m1.col(c1).setConstant(s1); + VERIFY_IS_CWISE_EQUAL(m1.col(c1), DynamicVectorType::Constant(rows, s1)); + m1 = m1_copy; + // test setZero logic with innerpanel and non-innerpanel blocks + m1.row(r1).setZero(); + VERIFY_IS_CWISE_EQUAL(m1.row(r1), DynamicVectorType::Zero(cols).transpose()); + m1 = m1_copy; + m1.col(c1).setZero(); + VERIFY_IS_CWISE_EQUAL(m1.col(c1), DynamicVectorType::Zero(rows)); + m1 = m1_copy; + // check row() and col() VERIFY_IS_EQUAL(m1.col(c1).transpose(), m1.transpose().row(c1)); // check operator(), both constant and non-constant, on row() and col()
diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp index a8d17d9..78ae09c 100644 --- a/test/mapped_matrix.cpp +++ b/test/mapped_matrix.cpp
@@ -75,8 +75,23 @@ Map<MatrixType> map4(array4, rows, cols); VERIFY_IS_EQUAL(map1, MatrixType::Ones(rows, cols)); + map1.setConstant(s1); + VERIFY_IS_EQUAL(map1, MatrixType::Constant(rows, cols, s1)); + map1.setZero(); + VERIFY_IS_EQUAL(map1, MatrixType::Zero(rows, cols)); + VERIFY_IS_EQUAL(map2, MatrixType::Ones(rows, cols)); + map2.setConstant(s1); + VERIFY_IS_EQUAL(map2, MatrixType::Constant(rows, cols, s1)); + map2.setZero(); + VERIFY_IS_EQUAL(map2, MatrixType::Zero(rows, cols)); + VERIFY_IS_EQUAL(map3, MatrixType::Ones(rows, cols)); + map3.setConstant(s1); + VERIFY_IS_EQUAL(map3, MatrixType::Constant(rows, cols, s1)); + map3.setZero(); + VERIFY_IS_EQUAL(map3, MatrixType::Zero(rows, cols)); + map1 = MatrixType::Random(rows, cols); map2 = map1; map3 = map1;
diff --git a/test/mapstride.cpp b/test/mapstride.cpp index 224316f..1a9f50c 100644 --- a/test/mapstride.cpp +++ b/test/mapstride.cpp
@@ -94,6 +94,8 @@ VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices, @@ -118,6 +120,8 @@ VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test both inner stride and outer stride @@ -138,6 +142,8 @@ VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test inner stride and no outer stride @@ -156,6 +162,8 @@ VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test negative strides
diff --git a/test/packetmath.cpp b/test/packetmath.cpp index e4d1b8c..cdbaad6 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp
@@ -199,6 +199,12 @@ pcast_array<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio>::cast(data1, DataSize, data2); VERIFY(test::areApprox(ref, data2, DataSize) && "internal::pcast<>"); + + // Test that pcast<SrcScalar, TgtScalar> generates the same result. + for (int i = 0; i < DataSize; ++i) { + data2[i] = internal::pcast<SrcScalar, TgtScalar>(data1[i]); + } + VERIFY(test::areApprox(ref, data2, DataSize) && "internal::pcast<>"); } }; @@ -1496,7 +1502,7 @@ template <typename Scalar, typename Packet> struct exp_complex_test_impl<Scalar, Packet, false> { typedef typename Scalar::value_type RealScalar; - static void run(Scalar*, Scalar*, Scalar*, int){}; + static void run(Scalar*, Scalar*, Scalar*, int) {}; }; template <typename Scalar, typename Packet>