Update Eigen to commit:2d4c9b400cca33d2f5cf316efc7151236244edb1
CHANGELOG
=========
2d4c9b400 - make fixed size matrices and arrays trivially_copy_constructible and trivially_move_constructible
132f281f5 - Fix generic ceil for SSE2.
84282c42f - optimize new dot product
fb477b8be - Better dot products
134b526d6 - Update NonBlockingThreadPool.h plain asserts to use eigen_plain_assert
072ec9d95 - Fix a bug for pcmp_lt_or_nan and Add sqrt support for SVE
931538979 - Fix bug in bug fix for atanh.
PiperOrigin-RevId: 680623801
Change-Id: I0493caa2a8dd2b403773c440cbe312fbade0b50e
diff --git a/Eigen/Core b/Eigen/Core
index 29dda39..e6dbe3a 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -324,6 +324,7 @@
#include "src/Core/CwiseNullaryOp.h"
#include "src/Core/CwiseUnaryView.h"
#include "src/Core/SelfCwiseBinaryOp.h"
+#include "src/Core/InnerProduct.h"
#include "src/Core/Dot.h"
#include "src/Core/StableNorm.h"
#include "src/Core/Stride.h"
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 29c9682..c808b63 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -126,9 +126,7 @@
: Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
#endif
- EIGEN_DEVICE_FUNC Array(Array && other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
- : Base(std::move(other)) {
- }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array && other) = default;
EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
Base::operator=(std::move(other));
return *this;
@@ -232,7 +230,7 @@
}
/** Copy constructor */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) : Base(other) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(const Array& other) = default;
private:
struct PrivateType {};
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index dd4a2c4..059527c 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -17,30 +17,6 @@
namespace internal {
-// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
-// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
-// looking at the static assertions. Thus this is a trick to get better compile errors.
-template <typename T, typename U,
- bool NeedToTranspose = T::IsVectorAtCompileTime && U::IsVectorAtCompileTime &&
- ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1) ||
- (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))>
-struct dot_nocheck {
- typedef scalar_conj_product_op<typename traits<T>::Scalar, typename traits<U>::Scalar> conj_prod;
- typedef typename conj_prod::result_type ResScalar;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) {
- return a.template binaryExpr<conj_prod>(b).sum();
- }
-};
-
-template <typename T, typename U>
-struct dot_nocheck<T, U, true> {
- typedef scalar_conj_product_op<typename traits<T>::Scalar, typename traits<U>::Scalar> conj_prod;
- typedef typename conj_prod::result_type ResScalar;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) {
- return a.transpose().template binaryExpr<conj_prod>(b).sum();
- }
-};
-
template <typename Derived, typename Scalar = typename traits<Derived>::Scalar>
struct squared_norm_impl {
using Real = typename NumTraits<Scalar>::Real;
@@ -74,18 +50,7 @@
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
- EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
- EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
-#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
- EIGEN_CHECK_BINARY_COMPATIBILIY(
- Eigen::internal::scalar_conj_product_op<Scalar EIGEN_COMMA typename OtherDerived::Scalar>, Scalar,
- typename OtherDerived::Scalar);
-#endif
-
- eigen_assert(size() == other.size());
-
- return internal::dot_nocheck<Derived, OtherDerived>::run(*this, other);
+ return internal::dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
}
//---------- implementation of L2 norm and related functions ----------
diff --git a/Eigen/src/Core/InnerProduct.h b/Eigen/src/Core/InnerProduct.h
new file mode 100644
index 0000000..38689da
--- /dev/null
+++ b/Eigen/src/Core/InnerProduct.h
@@ -0,0 +1,250 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2024 Charlie Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INNER_PRODUCT_EVAL_H
+#define EIGEN_INNER_PRODUCT_EVAL_H
+
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+
+namespace Eigen {
+
+namespace internal {
+
+// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists
+template <typename Scalar, int Size, typename Packet = typename packet_traits<Scalar>::type,
+ bool Stop =
+ (unpacket_traits<Packet>::size <= Size) || is_same<Packet, typename unpacket_traits<Packet>::half>::value>
+struct find_inner_product_packet_helper;
+
+template <typename Scalar, int Size, typename Packet>
+struct find_inner_product_packet_helper<Scalar, Size, Packet, false> {
+ using type = typename find_inner_product_packet_helper<Scalar, Size, typename unpacket_traits<Packet>::half>::type;
+};
+
+template <typename Scalar, int Size, typename Packet>
+struct find_inner_product_packet_helper<Scalar, Size, Packet, true> {
+ using type = Packet;
+};
+
+template <typename Scalar, int Size>
+struct find_inner_product_packet : find_inner_product_packet_helper<Scalar, Size> {};
+
+template <typename Scalar>
+struct find_inner_product_packet<Scalar, Dynamic> {
+ using type = typename packet_traits<Scalar>::type;
+};
+
+template <typename Lhs, typename Rhs>
+struct inner_product_assert {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs)
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs)
+ EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs)
+#ifndef EIGEN_NO_DEBUG
+ static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) {
+ eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size");
+ }
+#else
+ static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {}
+#endif
+};
+
+template <typename Func, typename Lhs, typename Rhs>
+struct inner_product_evaluator {
+ static constexpr int LhsFlags = evaluator<Lhs>::Flags, RhsFlags = evaluator<Rhs>::Flags,
+ SizeAtCompileTime = min_size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime),
+ LhsAlignment = evaluator<Lhs>::Alignment, RhsAlignment = evaluator<Rhs>::Alignment;
+
+ using Scalar = typename Func::result_type;
+ using Packet = typename find_inner_product_packet<Scalar, SizeAtCompileTime>::type;
+
+ static constexpr bool Vectorize =
+ bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess &&
+ ((SizeAtCompileTime == Dynamic) || (unpacket_traits<Packet>::size <= SizeAtCompileTime));
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs,
+ Func func = Func())
+ : m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) {
+ inner_product_assert<Lhs, Rhs>::run(lhs, rhs);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
+ return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index));
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const {
+ return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index));
+ }
+
+ template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
+ EIGEN_STRONG_INLINE PacketType packet(Index index) const {
+ return m_func.packet(m_lhs.template packet<LhsMode, PacketType>(index),
+ m_rhs.template packet<RhsMode, PacketType>(index));
+ }
+
+ template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
+ EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const {
+ return m_func.packet(value, m_lhs.template packet<LhsMode, PacketType>(index),
+ m_rhs.template packet<RhsMode, PacketType>(index));
+ }
+
+ const Func m_func;
+ const evaluator<Lhs> m_lhs;
+ const evaluator<Rhs> m_rhs;
+ const variable_if_dynamic<Index, SizeAtCompileTime> m_size;
+};
+
+template <typename Evaluator, bool Vectorize = Evaluator::Vectorize>
+struct inner_product_impl;
+
+// scalar loop
+template <typename Evaluator>
+struct inner_product_impl<Evaluator, false> {
+ using Scalar = typename Evaluator::Scalar;
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
+ const Index size = eval.size();
+ if (size == 0) return Scalar(0);
+
+ Scalar result = eval.coeff(0);
+ for (Index k = 1; k < size; k++) {
+ result = eval.coeff(result, k);
+ }
+
+ return result;
+ }
+};
+
+// vector loop
+template <typename Evaluator>
+struct inner_product_impl<Evaluator, true> {
+ using UnsignedIndex = std::make_unsigned_t<Index>;
+ using Scalar = typename Evaluator::Scalar;
+ using Packet = typename Evaluator::Packet;
+ static constexpr int PacketSize = unpacket_traits<Packet>::size;
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
+ const UnsignedIndex size = static_cast<UnsignedIndex>(eval.size());
+ if (size < PacketSize) return inner_product_impl<Evaluator, false>::run(eval);
+
+ const UnsignedIndex packetEnd = numext::round_down(size, PacketSize);
+ const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize);
+ const UnsignedIndex numPackets = size / PacketSize;
+ const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize;
+
+ Packet presult0, presult1, presult2, presult3;
+
+ presult0 = eval.template packet<Packet>(0 * PacketSize);
+ if (numPackets >= 2) presult1 = eval.template packet<Packet>(1 * PacketSize);
+ if (numPackets >= 3) presult2 = eval.template packet<Packet>(2 * PacketSize);
+ if (numPackets >= 4) {
+ presult3 = eval.template packet<Packet>(3 * PacketSize);
+
+ for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) {
+ presult0 = eval.packet(presult0, k + 0 * PacketSize);
+ presult1 = eval.packet(presult1, k + 1 * PacketSize);
+ presult2 = eval.packet(presult2, k + 2 * PacketSize);
+ presult3 = eval.packet(presult3, k + 3 * PacketSize);
+ }
+
+ if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize);
+ if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize);
+ if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize);
+
+ presult2 = padd(presult2, presult3);
+ }
+
+ if (numPackets >= 3) presult1 = padd(presult1, presult2);
+ if (numPackets >= 2) presult0 = padd(presult0, presult1);
+
+ Scalar result = predux(presult0);
+ for (UnsignedIndex k = packetEnd; k < size; k++) {
+ result = eval.coeff(result, k);
+ }
+
+ return result;
+ }
+};
+
+template <typename Scalar, bool Conj>
+struct conditional_conj;
+
+template <typename Scalar>
+struct conditional_conj<Scalar, true> {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); }
+ template <typename Packet>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
+ return pconj(a);
+ }
+};
+
+template <typename Scalar>
+struct conditional_conj<Scalar, false> {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; }
+ template <typename Packet>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
+ return a;
+ }
+};
+
+template <typename LhsScalar, typename RhsScalar, bool Conj>
+struct scalar_inner_product_op {
+ using result_type = typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType;
+ using conj_helper = conditional_conj<LhsScalar, Conj>;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const {
+ return (conj_helper::coeff(a) * b);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a,
+ const RhsScalar& b) const {
+ return (conj_helper::coeff(a) * b) + accum;
+ }
+ static constexpr bool PacketAccess = false;
+};
+
+template <typename Scalar, bool Conj>
+struct scalar_inner_product_op<Scalar, Scalar, Conj> {
+ using result_type = Scalar;
+ using conj_helper = conditional_conj<Scalar, Conj>;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const {
+ return pmul(conj_helper::coeff(a), b);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const {
+ return pmadd(conj_helper::coeff(a), b, accum);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const {
+ return pmul(conj_helper::packet(a), b);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const {
+ return pmadd(conj_helper::packet(a), b, accum);
+ }
+ static constexpr bool PacketAccess = packet_traits<Scalar>::HasMul && packet_traits<Scalar>::HasAdd;
+};
+
+template <typename Lhs, typename Rhs, bool Conj>
+struct default_inner_product_impl {
+ using LhsScalar = typename traits<Lhs>::Scalar;
+ using RhsScalar = typename traits<Rhs>::Scalar;
+ using Op = scalar_inner_product_op<LhsScalar, RhsScalar, Conj>;
+ using Evaluator = inner_product_evaluator<Op, Lhs, Rhs>;
+ using result_type = typename Evaluator::Scalar;
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase<Lhs>& a, const MatrixBase<Rhs>& b) {
+ Evaluator eval(a.derived(), b.derived(), Op());
+ return inner_product_impl<Evaluator>::run(eval);
+ }
+};
+
+template <typename Lhs, typename Rhs>
+struct dot_impl : default_inner_product_impl<Lhs, Rhs, true> {};
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_INNER_PRODUCT_EVAL_H
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 0d8691e..2c64936 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -258,9 +258,7 @@
internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert()){EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix && other)
- EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
- : Base(std::move(other)) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix && other) = default;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other)
EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) {
Base::operator=(std::move(other));
@@ -379,7 +377,7 @@
}
/** \brief Copy constructor */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(const Matrix& other) = default;
/** \brief Copy constructor for generic expressions.
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 5f846a0..df90318 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -485,8 +485,7 @@
}
#endif
- EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
- : m_storage(std::move(other.m_storage)) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&& other) = default;
EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT {
m_storage = std::move(other.m_storage);
@@ -494,8 +493,7 @@
}
/** Copy constructor */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other)
- : Base(), m_storage(other.m_storage) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other) = default;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
: m_storage(size, rows, cols) {
// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index fa4d038..77a658a 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -235,19 +235,20 @@
template <typename Lhs, typename Rhs>
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
+ using impl = default_inner_product_impl<Lhs, Rhs, false>;
template <typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
- dst.coeffRef(0, 0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+ dst.coeffRef(0, 0) = impl::run(lhs, rhs);
}
template <typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
- dst.coeffRef(0, 0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+ dst.coeffRef(0, 0) += impl::run(lhs, rhs);
}
template <typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
- dst.coeffRef(0, 0) -= (lhs.transpose().cwiseProduct(rhs)).sum();
+ dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
}
};
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 57bdbe3..5bce194 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -1222,7 +1222,7 @@
const Packet x_gt_half = pcmp_le(half, pabs(x));
const Packet x_eq_one = pcmp_eq(one, pabs(x));
const Packet x_gt_one = pcmp_lt(one, pabs(x));
- const Packet sign_mask = pset1<Packet>(-1.0f);
+ const Packet sign_mask = pset1<Packet>(-0.0f);
const Packet x_sign = pand(sign_mask, x);
const Packet inf = pset1<Packet>(std::numeric_limits<float>::infinity());
return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, r, p)));
@@ -1269,7 +1269,7 @@
const Packet x_gt_half = pcmp_le(half, pabs(x));
const Packet x_eq_one = pcmp_eq(one, pabs(x));
const Packet x_gt_one = pcmp_lt(one, pabs(x));
- const Packet sign_mask = pset1<Packet>(-1.0);
+ const Packet sign_mask = pset1<Packet>(-0.0);
const Packet x_sign = pand(sign_mask, x);
const Packet inf = pset1<Packet>(std::numeric_limits<double>::infinity());
return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, y_large, y_small)));
@@ -2598,11 +2598,14 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) {
using Scalar = typename unpacket_traits<Packet>::type;
const Packet cst_1 = pset1<Packet>(Scalar(1));
+ const Packet sign_mask = pset1<Packet>(static_cast<Scalar>(-0.0));
Packet rint_a = generic_rint(a);
// if rint(a) < a, then rint(a) == floor(a)
Packet mask = pcmp_lt(rint_a, a);
Packet offset = pand(cst_1, mask);
Packet result = padd(rint_a, offset);
+ // Signed zero must remain signed (e.g. ceil(-0.02) == -0).
+ result = por(result, pand(sign_mask, a));
return result;
}
diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h
index 924f897..51bbfe0 100644
--- a/Eigen/src/Core/arch/SVE/PacketMath.h
+++ b/Eigen/src/Core/arch/SVE/PacketMath.h
@@ -358,7 +358,7 @@
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
- HasSqrt = 0,
+ HasSqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH
};
@@ -478,12 +478,12 @@
return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
}
-// Do a predicate inverse (svnot_b_x) on the predicate resulted from the
+// Do a predicate inverse (svnot_b_z) on the predicate resulted from the
// greater/equal comparison (svcmpge_f32). Then fill a float vector with the
// active elements.
template <>
EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
- return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_x(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
+ return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
}
template <>
@@ -660,6 +660,11 @@
return pldexp_generic(a, exponent);
}
+template <>
+EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
+ return svsqrt_f32_x(svptrue_b32(), a);
+}
+
} // namespace internal
} // namespace Eigen
diff --git a/Eigen/src/ThreadPool/NonBlockingThreadPool.h b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
index 7caeecf..b6abc3c 100644
--- a/Eigen/src/ThreadPool/NonBlockingThreadPool.h
+++ b/Eigen/src/ThreadPool/NonBlockingThreadPool.h
@@ -263,13 +263,13 @@
uint64_t num_spinning = (state & kNumSpinningMask);
uint64_t num_no_notification = (state & kNumNoNotifyMask) >> kNumNoNotifyShift;
- assert(num_no_notification <= num_spinning);
+ eigen_plain_assert(num_no_notification <= num_spinning);
return {num_spinning, num_no_notification};
}
// Encodes as `spinning_state_` value.
uint64_t Encode() const {
- assert(num_no_notification <= num_spinning);
+ eigen_plain_assert(num_no_notification <= num_spinning);
return (num_no_notification << kNumNoNotifyShift) | num_spinning;
}
};