Update Eigen to commit:122befe54cc0c31273d9e1caef80b49ad834bf4c
CHANGELOG
=========
122befe54 - Fix "unary minus operator applied to unsigned type, result still unsigned" on MSVC and other stupid warnings
dcdb0233c - Refactor indexed view to appease MSVC 14.16.
5226566a1 - Speed up pldexp_generic.
3c6521ed9 - Add constexpr to accessors in DenseBase, Quaternions and Translations
3c9109238 - Add support for Packet8l to AVX512.
PiperOrigin-RevId: 625378611
Change-Id: Ib21f5d585439de99cba53737d4ceaebe323f5cbe
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 48c6d73..30e0aa3 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -89,12 +89,13 @@
*
* \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index row, Index col) const {
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return internal::evaluator<Derived>(derived()).coeff(row, col);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeffByOuterInner(Index outer,
+ Index inner) const {
return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner));
}
@@ -102,7 +103,7 @@
*
* \sa operator()(Index,Index), operator[](Index)
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index row, Index col) const {
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeff(row, col);
}
@@ -122,7 +123,7 @@
* \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index index) const {
EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
eigen_internal_assert(index >= 0 && index < size());
@@ -137,7 +138,7 @@
* z() const, w() const
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator[](Index index) const {
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
eigen_assert(index >= 0 && index < size());
@@ -154,32 +155,32 @@
* z() const, w() const
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index index) const {
eigen_assert(index >= 0 && index < size());
return coeff(index);
}
/** equivalent to operator[](0). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType x() const { return (*this)[0]; }
/** equivalent to operator[](1). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType y() const {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType z() const {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType w() const {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
@@ -361,32 +362,32 @@
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
*/
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& operator()(Index index) {
eigen_assert(index >= 0 && index < size());
return coeffRef(index);
}
/** equivalent to operator[](0). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& x() { return (*this)[0]; }
/** equivalent to operator[](1). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& y() {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS);
return (*this)[1];
}
/** equivalent to operator[](2). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& z() {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS);
return (*this)[2];
}
/** equivalent to operator[](3). */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& w() {
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS);
return (*this)[3];
}
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 58a197f..61f0eb9 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -335,12 +335,9 @@
/** \internal \returns -a (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
- return -a;
-}
-
-template <>
-EIGEN_DEVICE_FUNC inline bool pnegate(const bool& a) {
- return !a;
+ EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
+ NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
+ return numext::negate(a);
}
/** \internal \returns conj(a) (coeff-wise) */
@@ -1117,8 +1114,9 @@
/** \internal \returns the log10 of \a a (coeff-wise) */
template <typename Packet>
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
- typedef typename internal::unpacket_traits<Packet>::type Scalar;
- return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
+ using Scalar = typename internal::unpacket_traits<Packet>::type;
+ using RealScalar = typename NumTraits<Scalar>::Real;
+ return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
}
/** \internal \returns the square-root of \a a (coeff-wise) */
@@ -1293,13 +1291,13 @@
/** \internal \returns -(a * b) + c (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
- return padd(pnegate(pmul(a, b)), c);
+ return psub(c, pmul(a, b));
}
-/** \internal \returns -(a * b) - c (coeff-wise) */
+/** \internal \returns -((a * b + c) (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
- return psub(pnegate(pmul(a, b)), c);
+ return pnegate(pmadd(a, b, c));
}
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
@@ -1403,6 +1401,12 @@
template <size_t N>
struct Selector {
bool select[N];
+ template <typename MaskType = int>
+ EIGEN_DEVICE_FUNC inline MaskType mask(size_t begin = 0, size_t end = N) const {
+ MaskType res = 0;
+ for (size_t i = begin; i < end; i++) res |= (static_cast<MaskType>(select[i]) << i);
+ return res;
+ }
};
template <typename Packet>
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index f907d1e..2a42b18 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -861,6 +861,25 @@
typedef Scalar type;
};
+// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC
+// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero
+
+template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger>
+struct negate_impl {
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; }
+};
+
+template <typename Scalar>
+struct negate_impl<Scalar, true> {
+ EIGEN_STATIC_ASSERT((!is_same<Scalar, bool>::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
+ static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; }
+};
+
+template <typename Scalar>
+struct negate_retval {
+ typedef Scalar type;
+};
+
template <typename Scalar, bool IsInteger = NumTraits<typename unpacket_traits<Scalar>::type>::IsInteger>
struct nearest_integer_impl {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) {
@@ -1067,6 +1086,11 @@
}
template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x);
+}
+
+template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
}
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 2383e46..a53c38d 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -1844,7 +1844,7 @@
#endif
}
template <>
-EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& a) {
+EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& /*unused*/) {
return _mm256_setzero_si256();
}
#ifdef EIGEN_VECTORIZE_AVX2
@@ -1853,7 +1853,7 @@
return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& a) {
+EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& /*unused*/) {
return _mm256_setzero_si256();
}
#endif
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index ed2f189..5c53556 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -34,7 +34,7 @@
typedef __m512 Packet16f;
typedef __m512i Packet16i;
typedef __m512d Packet8d;
-// TODO(rmlarsen): Add support for Packet8l.
+typedef eigen_packet_wrapper<__m512i, 1> Packet8l;
#ifndef EIGEN_VECTORIZE_AVX512FP16
typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
#endif
@@ -52,6 +52,10 @@
struct is_arithmetic<__m512d> {
enum { value = true };
};
+template <>
+struct is_arithmetic<Packet8l> {
+ enum { value = true };
+};
#ifndef EIGEN_VECTORIZE_AVX512FP16
template <>
@@ -172,6 +176,13 @@
};
template <>
+struct packet_traits<int64_t> : default_packet_traits {
+ typedef Packet8l type;
+ typedef Packet4l half;
+ enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 8 };
+};
+
+template <>
struct unpacket_traits<Packet16f> {
typedef float type;
typedef Packet8f half;
@@ -190,6 +201,7 @@
struct unpacket_traits<Packet8d> {
typedef double type;
typedef Packet4d half;
+ typedef Packet8l integer_packet;
typedef uint8_t mask_t;
enum {
size = 8,
@@ -213,6 +225,19 @@
};
};
+template <>
+struct unpacket_traits<Packet8l> {
+ typedef int64_t type;
+ typedef Packet4l half;
+ enum {
+ size = 8,
+ alignment = Aligned64,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+};
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
template <>
struct unpacket_traits<Packet16h> {
@@ -240,6 +265,10 @@
EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
return _mm512_set1_epi32(from);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pset1<Packet8l>(const int64_t& from) {
+ return _mm512_set1_epi64(from);
+}
template <>
EIGEN_STRONG_INLINE Packet16f pset1frombits<Packet16f>(unsigned int from) {
@@ -265,6 +294,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l pzero(const Packet8l& /*a*/) {
+ return _mm512_setzero_si512();
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /*a*/) {
return _mm512_castsi512_ps(_mm512_set_epi32(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
}
@@ -276,6 +310,10 @@
EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /*a*/) {
return _mm512_castsi512_pd(_mm512_set_epi32(0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1));
}
+template <>
+EIGEN_STRONG_INLINE Packet8l peven_mask(const Packet8l& /*a*/) {
+ return _mm512_set_epi32(0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1);
+}
template <>
EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
@@ -313,6 +351,10 @@
EIGEN_STRONG_INLINE Packet16i plset<Packet16i>(const int& a) {
return _mm512_add_epi32(_mm512_set1_epi32(a), _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
}
+template <>
+EIGEN_STRONG_INLINE Packet8l plset<Packet8l>(const int64_t& a) {
+ return _mm512_add_epi64(_mm512_set1_epi64(a), _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0));
+}
template <>
EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a, const Packet16f& b) {
@@ -326,6 +368,10 @@
EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_add_epi32(a, b);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l padd<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_add_epi64(a, b);
+}
template <>
EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a, const Packet16f& b, uint16_t umask) {
@@ -350,6 +396,10 @@
EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_sub_epi32(a, b);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l psub<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_sub_epi64(a, b);
+}
template <>
EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
@@ -372,6 +422,10 @@
EIGEN_STRONG_INLINE Packet16i pnegate(const Packet16i& a) {
return _mm512_sub_epi32(_mm512_setzero_si512(), a);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pnegate(const Packet8l& a) {
+ return _mm512_sub_epi64(_mm512_setzero_si512(), a);
+}
template <>
EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {
@@ -385,6 +439,10 @@
EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {
return a;
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pconj(const Packet8l& a) {
+ return a;
+}
template <>
EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a, const Packet16f& b) {
@@ -398,6 +456,14 @@
EIGEN_STRONG_INLINE Packet16i pmul<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_mullo_epi32(a, b);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pmul<Packet8l>(const Packet8l& a, const Packet8l& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_mullo_epi64(a, b);
+#else
+ return _mm512_mullox_epi64(a, b);
+#endif
+}
template <>
EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a, const Packet16f& b) {
@@ -467,6 +533,12 @@
}
template <>
+EIGEN_DEVICE_FUNC inline Packet8l pselect(const Packet8l& mask, const Packet8l& a, const Packet8l& b) {
+ __mmask8 mask8 = _mm512_cmpeq_epi64_mask(mask, _mm512_setzero_si512());
+ return _mm512_mask_blend_epi64(mask8, a, b);
+}
+
+template <>
EIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask, const Packet8d& a, const Packet8d& b) {
__mmask8 mask8 = _mm512_cmp_epi64_mask(_mm512_castpd_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
return _mm512_mask_blend_pd(mask8, a, b);
@@ -486,6 +558,10 @@
EIGEN_STRONG_INLINE Packet16i pmin<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_min_epi32(b, a);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pmin<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_min_epi64(b, a);
+}
template <>
EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a, const Packet16f& b) {
@@ -501,6 +577,10 @@
EIGEN_STRONG_INLINE Packet16i pmax<Packet16i>(const Packet16i& a, const Packet16i& b) {
return _mm512_max_epi32(b, a);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pmax<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_max_epi64(b, a);
+}
// Add specializations for min/max with prescribed NaN progation.
template <>
@@ -593,46 +673,62 @@
template <>
EIGEN_STRONG_INLINE Packet16f pisnan(const Packet16f& a) {
__mmask16 mask = _mm512_cmp_ps_mask(a, a, _CMP_UNORD_Q);
- return _mm512_castsi512_ps(_mm512_maskz_set1_epi32(mask, 0xffffffffu));
+ return _mm512_castsi512_ps(_mm512_maskz_set1_epi32(mask, int32_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) {
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
- return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
+ return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) {
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ);
- return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
+ return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) {
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
- return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
+ return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ);
- return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu));
+ return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
__mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_EQ);
- return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
+ return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1));
}
template <>
EIGEN_STRONG_INLINE Packet16i pcmp_le(const Packet16i& a, const Packet16i& b) {
__mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LE);
- return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
+ return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1));
}
template <>
EIGEN_STRONG_INLINE Packet16i pcmp_lt(const Packet16i& a, const Packet16i& b) {
__mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
- return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu);
+ return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8l pcmp_eq(const Packet8l& a, const Packet8l& b) {
+ __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_EQ);
+ return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8l pcmp_le(const Packet8l& a, const Packet8l& b) {
+ __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LE);
+ return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8l pcmp_lt(const Packet8l& a, const Packet8l& b) {
+ __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LT);
+ return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1));
}
template <>
@@ -685,7 +781,12 @@
template <>
EIGEN_STRONG_INLINE Packet16i ptrue<Packet16i>(const Packet16i& /*a*/) {
- return _mm512_set1_epi32(0xffffffffu);
+ return _mm512_set1_epi32(int32_t(-1));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8l ptrue<Packet8l>(const Packet8l& /*a*/) {
+ return _mm512_set1_epi64(int64_t(-1));
}
template <>
@@ -704,6 +805,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l pand<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_and_si512(a, b);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_and_ps(a, b);
@@ -733,6 +839,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l por<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_or_si512(a, b);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_or_ps(a, b);
@@ -756,6 +867,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l pxor<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_xor_si512(a, b);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_xor_ps(a, b);
@@ -779,6 +895,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l pandnot<Packet8l>(const Packet8l& a, const Packet8l& b) {
+ return _mm512_andnot_si512(b, a);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a, const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
return _mm512_andnot_ps(b, a);
@@ -825,6 +946,21 @@
return _mm512_slli_epi32(a, N);
}
+template <int N>
+EIGEN_STRONG_INLINE Packet8l parithmetic_shift_right(Packet8l a) {
+ return _mm512_srai_epi64(a, N);
+}
+
+template <int N>
+EIGEN_STRONG_INLINE Packet8l plogical_shift_right(Packet8l a) {
+ return _mm512_srli_epi64(a, N);
+}
+
+template <int N>
+EIGEN_STRONG_INLINE Packet8l plogical_shift_left(Packet8l a) {
+ return _mm512_slli_epi64(a, N);
+}
+
template <>
EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
@@ -835,7 +971,11 @@
}
template <>
EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
- EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(reinterpret_cast<const __m512i*>(from));
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_epi64(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8l pload<Packet8l>(const int64_t* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_epi64(from);
}
template <>
@@ -848,7 +988,11 @@
}
template <>
EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
- EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(reinterpret_cast<const __m512i*>(from));
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi32(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8l ploadu<Packet8l>(const int64_t* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi64(from);
}
template <>
@@ -868,42 +1012,35 @@
EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
// an unaligned load is required here as there is no requirement
// on the alignment of input pointer 'from'
- __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+ __m256i low_half = _mm256_castps_si256(_mm256_loadu_ps(from));
__m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
__m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
return pairs;
}
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-// FIXME: this does not look optimal, better load a Packet4d and shuffle...
-// Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3,
+// Loads 4 doubles from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3,
// a3}
template <>
EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
- __m512d x = _mm512_setzero_pd();
- x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0);
- x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1);
- x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2);
- x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3);
- return x;
+ Packet8d tmp = _mm512_castpd256_pd512(ploadu<Packet4d>(from));
+ const Packet8l scatter_mask = _mm512_set_epi64(3, 3, 2, 2, 1, 1, 0, 0);
+ return _mm512_permutexvar_pd(scatter_mask, tmp);
}
-#else
+
+// Loads 4 int64_t from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3,
+// a3}
template <>
-EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
- __m512d x = _mm512_setzero_pd();
- x = _mm512_mask_broadcastsd_pd(x, 0x3 << 0, _mm_load_sd(from + 0));
- x = _mm512_mask_broadcastsd_pd(x, 0x3 << 2, _mm_load_sd(from + 1));
- x = _mm512_mask_broadcastsd_pd(x, 0x3 << 4, _mm_load_sd(from + 2));
- x = _mm512_mask_broadcastsd_pd(x, 0x3 << 6, _mm_load_sd(from + 3));
- return x;
+EIGEN_STRONG_INLINE Packet8l ploaddup<Packet8l>(const int64_t* from) {
+ Packet8l tmp = _mm512_castsi256_si512(ploadu<Packet4l>(from));
+ const Packet8l scatter_mask = _mm512_set_epi64(3, 3, 2, 2, 1, 1, 0, 0);
+ return _mm512_permutexvar_epi64(scatter_mask, tmp);
}
-#endif
// Loads 8 integers from memory and returns the packet
// {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
template <>
EIGEN_STRONG_INLINE Packet16i ploaddup<Packet16i>(const int* from) {
- __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+ __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
__m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
__m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
return _mm512_castps_si512(pairs);
@@ -929,6 +1066,17 @@
return _mm512_insertf64x4(tmp, lane1, 1);
}
+// Loads 2 int64_t from memory a returns the packet
+// {a0, a0 a0, a0, a1, a1, a1, a1}
+template <>
+EIGEN_STRONG_INLINE Packet8l ploadquad<Packet8l>(const int64_t* from) {
+ __m256i lane0 = _mm256_set1_epi64x(*from);
+ __m256i lane1 = _mm256_set1_epi64x(*(from + 1));
+ __m512i tmp = _mm512_undefined_epi32();
+ tmp = _mm512_inserti64x4(tmp, lane0, 0);
+ return _mm512_inserti64x4(tmp, lane1, 1);
+}
+
// Loads 4 integers from memory and returns the packet
// {a0, a0 a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}
template <>
@@ -948,7 +1096,11 @@
}
template <>
EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {
- EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to), from);
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_epi32(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet8l& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_epi64(to, from);
}
template <>
@@ -961,7 +1113,11 @@
}
template <>
EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
- EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to), from);
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi32(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet8l& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi64(to, from);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16_t umask) {
@@ -1015,6 +1171,14 @@
return _mm512_i32gather_pd(indices, from, 8);
}
template <>
+EIGEN_DEVICE_FUNC inline Packet8l pgather<int64_t, Packet8l>(const int64_t* from, Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+
+ return _mm512_i32gather_epi64(indices, from, 8);
+}
+template <>
EIGEN_DEVICE_FUNC inline Packet16i pgather<int, Packet16i>(const int* from, Index stride) {
Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
Packet16i stride_multiplier = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@@ -1043,7 +1207,6 @@
__mmask8 mask = static_cast<__mmask8>(umask);
_mm512_mask_i32scatter_pd(to, mask, indices, from, 8);
}
-
template <>
EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to, const Packet16f& from, Index stride) {
Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
@@ -1059,6 +1222,13 @@
_mm512_i32scatter_pd(to, indices, from, 8);
}
template <>
+EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet8l>(int64_t* to, const Packet8l& from, Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+ _mm512_i32scatter_epi64(to, indices, from, 8);
+}
+template <>
EIGEN_DEVICE_FUNC inline void pscatter<int, Packet16i>(int* to, const Packet16i& from, Index stride) {
Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
Packet16i stride_multiplier = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@@ -1081,6 +1251,11 @@
Packet16i pa = pset1<Packet16i>(a);
pstore(to, pa);
}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet8l>(int64_t* to, const int64_t& a) {
+ Packet8l pa = pset1<Packet8l>(a);
+ pstore(to, pa);
+}
template <>
EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
@@ -1097,15 +1272,20 @@
template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
- return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));
+ return _mm512_cvtss_f32(a);
}
template <>
EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {
- return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));
+ return _mm512_cvtsd_f64(a);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t pfirst<Packet8l>(const Packet8l& a) {
+ int64_t x = _mm_extract_epi64_0(_mm512_extracti32x4_epi32(a, 0));
+ return x;
}
template <>
EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {
- return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);
+ return _mm512_cvtsi512_si32(a);
}
template <>
@@ -1124,6 +1304,11 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l preverse(const Packet8l& a) {
+ return _mm512_permutexvar_epi64(_mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7), a);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) {
// _mm512_abs_ps intrinsic not found, so hack around it
return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
@@ -1137,6 +1322,10 @@
EIGEN_STRONG_INLINE Packet16i pabs(const Packet16i& a) {
return _mm512_abs_epi32(a);
}
+template <>
+EIGEN_STRONG_INLINE Packet8l pabs(const Packet8l& a) {
+ return _mm512_abs_epi64(a);
+}
template <>
EIGEN_STRONG_INLINE Packet16h psignbit(const Packet16h& a) {
@@ -1268,9 +1457,7 @@
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
__m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));
- sum = _mm_hadd_ps(sum, sum);
- sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
- return _mm_cvtss_f32(sum);
+ return predux<Packet4f>(sum);
#endif
}
template <>
@@ -1278,26 +1465,17 @@
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
__m256d sum = _mm256_add_pd(lane0, lane1);
- __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
- return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));
+ return predux<Packet4d>(sum);
}
+
+template <>
+EIGEN_STRONG_INLINE int64_t predux<Packet8l>(const Packet8l& a) {
+ return _mm512_reduce_add_epi64(a);
+}
+
template <>
EIGEN_STRONG_INLINE int predux<Packet16i>(const Packet16i& a) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
- __m256i lane0 = _mm512_extracti32x8_epi32(a, 0);
- __m256i lane1 = _mm512_extracti32x8_epi32(a, 1);
- Packet8i x = _mm256_add_epi32(lane0, lane1);
- return predux<Packet8i>(x);
-#else
- __m128i lane0 = _mm512_extracti32x4_epi32(a, 0);
- __m128i lane1 = _mm512_extracti32x4_epi32(a, 1);
- __m128i lane2 = _mm512_extracti32x4_epi32(a, 2);
- __m128i lane3 = _mm512_extracti32x4_epi32(a, 3);
- __m128i sum = _mm_add_epi32(_mm_add_epi32(lane0, lane1), _mm_add_epi32(lane2, lane3));
- sum = _mm_hadd_epi32(sum, sum);
- sum = _mm_hadd_epi32(sum, _mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(sum), 1)));
- return _mm_cvtsi128_si32(sum);
-#endif
+ return _mm512_reduce_add_epi32(a);
}
template <>
@@ -1340,6 +1518,13 @@
}
template <>
+EIGEN_STRONG_INLINE Packet4l predux_half_dowto4<Packet8l>(const Packet8l& a) {
+ __m256i lane0 = _mm512_extracti64x4_epi64(a, 0);
+ __m256i lane1 = _mm512_extracti64x4_epi64(a, 1);
+ return _mm256_add_epi64(lane0, lane1);
+}
+
+template <>
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
// #ifdef EIGEN_VECTORIZE_AVX512DQ
#if 0
@@ -1367,6 +1552,14 @@
res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
}
+template <>
+EIGEN_STRONG_INLINE int predux_mul<Packet16i>(const Packet16i& a) {
+ return _mm512_reduce_mul_epi32(a);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
+ return _mm512_reduce_mul_epi64(a);
+}
template <>
EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
@@ -1386,6 +1579,14 @@
res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
+template <>
+EIGEN_STRONG_INLINE int predux_min<Packet16i>(const Packet16i& a) {
+ return _mm512_reduce_min_epi32(a);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t predux_min<Packet8l>(const Packet8l& a) {
+ return _mm512_reduce_min_epi64(a);
+}
template <>
EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
@@ -1406,6 +1607,14 @@
res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
}
+template <>
+EIGEN_STRONG_INLINE int predux_max<Packet16i>(const Packet16i& a) {
+ return _mm512_reduce_max_epi32(a);
+}
+template <>
+EIGEN_STRONG_INLINE int64_t predux_max<Packet8l>(const Packet8l& a) {
+ return _mm512_reduce_max_epi64(a);
+}
template <>
EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) {
@@ -1617,6 +1826,10 @@
OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
+#define PACK_OUTPUT_L(OUTPUT, INPUT, INDEX, STRIDE) \
+ OUTPUT[INDEX] = _mm512_inserti64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
+ OUTPUT[INDEX] = _mm512_inserti64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
__m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
__m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);
@@ -1695,6 +1908,88 @@
kernel.packet[7] = T7;
}
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8l, 4>& kernel) {
+ __m512i T0 = _mm512_castpd_si512(
+ _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[0]), _mm512_castsi512_pd(kernel.packet[1]), 0));
+ __m512i T1 = _mm512_castpd_si512(
+ _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[0]), _mm512_castsi512_pd(kernel.packet[1]), 0xff));
+ __m512i T2 = _mm512_castpd_si512(
+ _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[2]), _mm512_castsi512_pd(kernel.packet[3]), 0));
+ __m512i T3 = _mm512_castpd_si512(
+ _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[2]), _mm512_castsi512_pd(kernel.packet[3]), 0xff));
+
+ PacketBlock<Packet4l, 8> tmp;
+
+ tmp.packet[0] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 0), _mm512_extracti64x4_epi64(T2, 0), 0x20);
+ tmp.packet[1] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 0), _mm512_extracti64x4_epi64(T3, 0), 0x20);
+ tmp.packet[2] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 0), _mm512_extracti64x4_epi64(T2, 0), 0x31);
+ tmp.packet[3] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 0), _mm512_extracti64x4_epi64(T3, 0), 0x31);
+
+ tmp.packet[4] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 1), _mm512_extracti64x4_epi64(T2, 1), 0x20);
+ tmp.packet[5] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 1), _mm512_extracti64x4_epi64(T3, 1), 0x20);
+ tmp.packet[6] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 1), _mm512_extracti64x4_epi64(T2, 1), 0x31);
+ tmp.packet[7] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 1), _mm512_extracti64x4_epi64(T3, 1), 0x31);
+
+ PACK_OUTPUT_L(kernel.packet, tmp.packet, 0, 1);
+ PACK_OUTPUT_L(kernel.packet, tmp.packet, 1, 1);
+ PACK_OUTPUT_L(kernel.packet, tmp.packet, 2, 1);
+ PACK_OUTPUT_L(kernel.packet, tmp.packet, 3, 1);
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8l, 8>& kernel) {
+ __m512i T0 = _mm512_unpacklo_epi64(kernel.packet[0], kernel.packet[1]);
+ __m512i T1 = _mm512_unpackhi_epi64(kernel.packet[0], kernel.packet[1]);
+ __m512i T2 = _mm512_unpacklo_epi64(kernel.packet[2], kernel.packet[3]);
+ __m512i T3 = _mm512_unpackhi_epi64(kernel.packet[2], kernel.packet[3]);
+ __m512i T4 = _mm512_unpacklo_epi64(kernel.packet[4], kernel.packet[5]);
+ __m512i T5 = _mm512_unpackhi_epi64(kernel.packet[4], kernel.packet[5]);
+ __m512i T6 = _mm512_unpacklo_epi64(kernel.packet[6], kernel.packet[7]);
+ __m512i T7 = _mm512_unpackhi_epi64(kernel.packet[6], kernel.packet[7]);
+
+ kernel.packet[0] = _mm512_permutex_epi64(T2, 0x4E);
+ kernel.packet[0] = _mm512_mask_blend_epi64(0xCC, T0, kernel.packet[0]);
+ kernel.packet[2] = _mm512_permutex_epi64(T0, 0x4E);
+ kernel.packet[2] = _mm512_mask_blend_epi64(0xCC, kernel.packet[2], T2);
+ kernel.packet[1] = _mm512_permutex_epi64(T3, 0x4E);
+ kernel.packet[1] = _mm512_mask_blend_epi64(0xCC, T1, kernel.packet[1]);
+ kernel.packet[3] = _mm512_permutex_epi64(T1, 0x4E);
+ kernel.packet[3] = _mm512_mask_blend_epi64(0xCC, kernel.packet[3], T3);
+ kernel.packet[4] = _mm512_permutex_epi64(T6, 0x4E);
+ kernel.packet[4] = _mm512_mask_blend_epi64(0xCC, T4, kernel.packet[4]);
+ kernel.packet[6] = _mm512_permutex_epi64(T4, 0x4E);
+ kernel.packet[6] = _mm512_mask_blend_epi64(0xCC, kernel.packet[6], T6);
+ kernel.packet[5] = _mm512_permutex_epi64(T7, 0x4E);
+ kernel.packet[5] = _mm512_mask_blend_epi64(0xCC, T5, kernel.packet[5]);
+ kernel.packet[7] = _mm512_permutex_epi64(T5, 0x4E);
+ kernel.packet[7] = _mm512_mask_blend_epi64(0xCC, kernel.packet[7], T7);
+
+ T0 = _mm512_shuffle_i64x2(kernel.packet[4], kernel.packet[4], 0x4E);
+ T0 = _mm512_mask_blend_epi64(0xF0, kernel.packet[0], T0);
+ T4 = _mm512_shuffle_i64x2(kernel.packet[0], kernel.packet[0], 0x4E);
+ T4 = _mm512_mask_blend_epi64(0xF0, T4, kernel.packet[4]);
+ T1 = _mm512_shuffle_i64x2(kernel.packet[5], kernel.packet[5], 0x4E);
+ T1 = _mm512_mask_blend_epi64(0xF0, kernel.packet[1], T1);
+ T5 = _mm512_shuffle_i64x2(kernel.packet[1], kernel.packet[1], 0x4E);
+ T5 = _mm512_mask_blend_epi64(0xF0, T5, kernel.packet[5]);
+ T2 = _mm512_shuffle_i64x2(kernel.packet[6], kernel.packet[6], 0x4E);
+ T2 = _mm512_mask_blend_epi64(0xF0, kernel.packet[2], T2);
+ T6 = _mm512_shuffle_i64x2(kernel.packet[2], kernel.packet[2], 0x4E);
+ T6 = _mm512_mask_blend_epi64(0xF0, T6, kernel.packet[6]);
+ T3 = _mm512_shuffle_i64x2(kernel.packet[7], kernel.packet[7], 0x4E);
+ T3 = _mm512_mask_blend_epi64(0xF0, kernel.packet[3], T3);
+ T7 = _mm512_shuffle_i64x2(kernel.packet[3], kernel.packet[3], 0x4E);
+ T7 = _mm512_mask_blend_epi64(0xF0, T7, kernel.packet[7]);
+
+ kernel.packet[0] = T0;
+ kernel.packet[1] = T1;
+ kernel.packet[2] = T2;
+ kernel.packet[3] = T3;
+ kernel.packet[4] = T4;
+ kernel.packet[5] = T5;
+ kernel.packet[6] = T6;
+ kernel.packet[7] = T7;
+}
+
#define PACK_OUTPUT_I32(OUTPUT, INPUT, INDEX, STRIDE) \
EIGEN_INSERT_8i_INTO_16i(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
@@ -1852,20 +2147,13 @@
template <>
EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ifPacket, const Packet16f& thenPacket,
const Packet16f& elsePacket) {
- __mmask16 m = (ifPacket.select[0]) | (ifPacket.select[1] << 1) | (ifPacket.select[2] << 2) |
- (ifPacket.select[3] << 3) | (ifPacket.select[4] << 4) | (ifPacket.select[5] << 5) |
- (ifPacket.select[6] << 6) | (ifPacket.select[7] << 7) | (ifPacket.select[8] << 8) |
- (ifPacket.select[9] << 9) | (ifPacket.select[10] << 10) | (ifPacket.select[11] << 11) |
- (ifPacket.select[12] << 12) | (ifPacket.select[13] << 13) | (ifPacket.select[14] << 14) |
- (ifPacket.select[15] << 15);
+ __mmask16 m = ifPacket.mask<__mmask16>();
return _mm512_mask_blend_ps(m, elsePacket, thenPacket);
}
template <>
EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket, const Packet8d& thenPacket,
const Packet8d& elsePacket) {
- __mmask8 m = (ifPacket.select[0]) | (ifPacket.select[1] << 1) | (ifPacket.select[2] << 2) |
- (ifPacket.select[3] << 3) | (ifPacket.select[4] << 4) | (ifPacket.select[5] << 5) |
- (ifPacket.select[6] << 6) | (ifPacket.select[7] << 7);
+ __mmask8 m = ifPacket.mask<__mmask8>();
return _mm512_mask_blend_pd(m, elsePacket, thenPacket);
}
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h
index ccdb563..b16e9f6 100644
--- a/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -37,6 +37,11 @@
template <>
struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
+template <>
+struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
+template <>
+struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
+
#ifndef EIGEN_VECTORIZE_AVX512FP16
template <>
struct type_casting_traits<half, float> : vectorized_type_casting_traits<half, float> {};
@@ -76,6 +81,19 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8l pcast<Packet8d, Packet8l>(const Packet8d& a) {
+#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
+ return _mm512_cvttpd_epi64(a);
+#else
+ EIGEN_ALIGN16 double aux[8];
+ pstore(aux, a);
+ return _mm512_set_epi64(static_cast<int64_t>(aux[7]), static_cast<int64_t>(aux[6]), static_cast<int64_t>(aux[5]),
+ static_cast<int64_t>(aux[4]), static_cast<int64_t>(aux[3]), static_cast<int64_t>(aux[2]),
+ static_cast<int64_t>(aux[1]), static_cast<int64_t>(aux[0]));
+#endif
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
return _mm512_cvtepi32_ps(a);
}
@@ -91,6 +109,19 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8d pcast<Packet8l, Packet8d>(const Packet8l& a) {
+#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
+ return _mm512_cvtepi64_pd(a);
+#else
+ EIGEN_ALIGN16 int64_t aux[8];
+ pstore(aux, a);
+ return _mm512_set_pd(static_cast<double>(aux[7]), static_cast<double>(aux[6]), static_cast<double>(aux[5]),
+ static_cast<double>(aux[4]), static_cast<double>(aux[3]), static_cast<double>(aux[2]),
+ static_cast<double>(aux[1]), static_cast<double>(aux[0]));
+#endif
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet8d& a, const Packet8d& b) {
return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
}
@@ -125,6 +156,16 @@
}
template <>
+EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8l>(const Packet8l& a) {
+ return _mm512_castsi512_pd(a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8l preinterpret<Packet8l, Packet8d>(const Packet8d& a) {
+ return _mm512_castpd_si512(a);
+}
+
+template <>
EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8d>(const Packet8d& a) {
return _mm512_castpd_ps(a);
}
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 78dbf20..c973efd 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -129,8 +129,8 @@
const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^b
- Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
- b = psub(psub(psub(e, b), b), b); // e - 3b
+ Packet out = pmul(pmul(a, c), pmul(c, c)); // a * 2^(3b)
+ b = pnmadd(pset1<PacketI>(3), b, e); // e - 3b
c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^(e-3*b)
out = pmul(out, c);
return out;
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 008109a..e91ef4d 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -296,6 +296,7 @@
HasMax = 0,
HasConj = 0,
HasSqrt = 1,
+ HasNegate = 0,
HasSign = 0 // Don't try to vectorize psign<bool> = identity.
};
};
@@ -602,11 +603,6 @@
}
template <>
-EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) {
- return a;
-}
-
-template <>
EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
return a;
}
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2f9b920..8d95819 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -24,7 +24,7 @@
*/
template <typename Scalar>
struct scalar_opposite_op {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return -a; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return numext::negate(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
return internal::pnegate(a);
@@ -455,8 +455,9 @@
*/
template <typename Scalar>
struct scalar_log2_op {
+ using RealScalar = typename NumTraits<Scalar>::Real;
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const {
- return Scalar(EIGEN_LOG2E) * numext::log(a);
+ return Scalar(RealScalar(EIGEN_LOG2E)) * numext::log(a);
}
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h
index c187002..59486ea 100644
--- a/Eigen/src/Core/util/IndexedViewHelper.h
+++ b/Eigen/src/Core/util/IndexedViewHelper.h
@@ -308,6 +308,178 @@
static Index incr(const Indices& indices) { return indices.incr(); }
};
+// this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true
+template <typename Derived, typename RowIndices, typename ColIndices, typename EnableIf = void>
+struct IndexedViewSelector;
+
+template <typename Indices, int SizeAtCompileTime>
+using IvcType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type;
+
+template <int SizeAtCompileTime, typename Indices>
+inline IvcType<Indices, SizeAtCompileTime> CreateIndexSequence(size_t size, const Indices& indices) {
+ return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices, size);
+}
+
+// Generic
+template <typename Derived, typename RowIndices, typename ColIndices>
+struct IndexedViewSelector<Derived, RowIndices, ColIndices,
+ std::enable_if_t<internal::traits<
+ IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>,
+ IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsIndexedView>> {
+ using ReturnType = IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>,
+ IvcType<ColIndices, Derived::ColsAtCompileTime>>;
+ using ConstReturnType = IndexedView<const Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>,
+ IvcType<ColIndices, Derived::ColsAtCompileTime>>;
+
+ static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
+ return ReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices),
+ CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices));
+ }
+ static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
+ const ColIndices& colIndices) {
+ return ConstReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices),
+ CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices));
+ }
+};
+
+// Block
+template <typename Derived, typename RowIndices, typename ColIndices>
+struct IndexedViewSelector<
+ Derived, RowIndices, ColIndices,
+ std::enable_if_t<internal::traits<IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>,
+ IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsBlock>> {
+ using ActualRowIndices = IvcType<RowIndices, Derived::RowsAtCompileTime>;
+ using ActualColIndices = IvcType<ColIndices, Derived::ColsAtCompileTime>;
+ using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>;
+ using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>;
+ using ReturnType = typename internal::traits<IndexedViewType>::BlockType;
+ using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType;
+ using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
+ using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
+
+ static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
+ auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices);
+ auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices);
+ return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
+ RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
+ }
+ static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
+ const ColIndices& colIndices) {
+ auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices);
+ auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices);
+ return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
+ RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
+ }
+};
+
+// Scalar
+template <typename Derived, typename RowIndices, typename ColIndices>
+struct IndexedViewSelector<
+ Derived, RowIndices, ColIndices,
+ std::enable_if_t<internal::traits<IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>,
+ IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsScalar>> {
+ using ReturnType = typename DenseBase<Derived>::Scalar&;
+ using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
+ using ActualRowIndices = IvcType<RowIndices, Derived::RowsAtCompileTime>;
+ using ActualColIndices = IvcType<ColIndices, Derived::ColsAtCompileTime>;
+ using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
+ using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
+ static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
+ auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices);
+ auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices);
+ return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
+ }
+ static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
+ const ColIndices& colIndices) {
+ auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices);
+ auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices);
+ return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
+ }
+};
+
+// this helper class assumes internal::is_valid_index_type<Indices>::value == false
+template <typename Derived, typename Indices, typename EnableIf = void>
+struct VectorIndexedViewSelector;
+
+// Generic
+template <typename Derived, typename Indices>
+struct VectorIndexedViewSelector<
+ Derived, Indices,
+ std::enable_if_t<!internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value &&
+ internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>::IncrAtCompileTime !=
+ 1>> {
+ static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor;
+ using ZeroIndex = internal::SingleRange<Index(0)>;
+ using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcType<Indices, Derived::SizeAtCompileTime>>;
+ using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcType<Indices, Derived::SizeAtCompileTime>>;
+
+ using ColMajorReturnType = IndexedView<Derived, IvcType<Indices, Derived::SizeAtCompileTime>, ZeroIndex>;
+ using ConstColMajorReturnType = IndexedView<const Derived, IvcType<Indices, Derived::SizeAtCompileTime>, ZeroIndex>;
+
+ using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type;
+ using ConstReturnType =
+ typename internal::conditional<IsRowMajor, ConstRowMajorReturnType, ConstColMajorReturnType>::type;
+
+ template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
+ static inline RowMajorReturnType run(Derived& derived, const Indices& indices) {
+ return RowMajorReturnType(derived, ZeroIndex(0),
+ CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), indices));
+ }
+ template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
+ static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) {
+ return ConstRowMajorReturnType(derived, ZeroIndex(0),
+ CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), indices));
+ }
+ template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
+ static inline ColMajorReturnType run(Derived& derived, const Indices& indices) {
+ return ColMajorReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), indices),
+ ZeroIndex(0));
+ }
+ template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
+ static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) {
+ return ConstColMajorReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), indices),
+ ZeroIndex(0));
+ }
+};
+
+// Block
+template <typename Derived, typename Indices>
+struct VectorIndexedViewSelector<
+ Derived, Indices,
+ std::enable_if_t<!internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value &&
+ internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>::IncrAtCompileTime ==
+ 1>> {
+ using Helper = internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>;
+ using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>;
+ using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>;
+ static inline ReturnType run(Derived& derived, const Indices& indices) {
+ auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices);
+ return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
+ }
+ static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
+ auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices);
+ return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
+ }
+};
+
+// Symbolic
+template <typename Derived, typename Indices>
+struct VectorIndexedViewSelector<
+ Derived, Indices,
+ std::enable_if_t<internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value>> {
+ using ReturnType = typename DenseBase<Derived>::Scalar&;
+ using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
+ using Helper = internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>;
+ static inline ReturnType run(Derived& derived, const Indices& indices) {
+ auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices);
+ return derived(Helper::first(actualIndices));
+ }
+ static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
+ auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices);
+ return derived(Helper::first(actualIndices));
+ }
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 8931c4a..1d8ded9 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -57,22 +57,22 @@
typedef AngleAxis<Scalar> AngleAxisType;
/** \returns the \c x coefficient */
- EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
/** \returns the \c y coefficient */
- EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
/** \returns the \c z coefficient */
- EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
/** \returns the \c w coefficient */
- EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
/** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
- EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
/** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
- EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
/** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
- EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
/** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
- EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
/** \returns a read-only vector expression of the imaginary part (x,y,z) */
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients, 3> vec() const { return coeffs().template head<3>(); }
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index 956ef56..682c4c7 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -69,18 +69,18 @@
EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
/** \brief Returns the x-translation by value. **/
- EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar x() const { return m_coeffs.x(); }
/** \brief Returns the y-translation by value. **/
- EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar y() const { return m_coeffs.y(); }
/** \brief Returns the z-translation by value. **/
- EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar z() const { return m_coeffs.z(); }
/** \brief Returns the x-translation as a reference. **/
- EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& x() { return m_coeffs.x(); }
/** \brief Returns the y-translation as a reference. **/
- EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& y() { return m_coeffs.y(); }
/** \brief Returns the z-translation as a reference. **/
- EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& z() { return m_coeffs.z(); }
EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
diff --git a/Eigen/src/plugins/IndexedViewMethods.inc b/Eigen/src/plugins/IndexedViewMethods.inc
index c3df429..a51e349 100644
--- a/Eigen/src/plugins/IndexedViewMethods.inc
+++ b/Eigen/src/plugins/IndexedViewMethods.inc
@@ -10,184 +10,6 @@
#if !defined(EIGEN_PARSED_BY_DOXYGEN)
public:
-// define some aliases to ease readability
-
-template <typename Indices>
-using IvcRowType = typename internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::type;
-
-template <typename Indices>
-using IvcColType = typename internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::type;
-
-template <typename Indices>
-using IvcSizeType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type;
-
-template <typename Indices>
-inline IvcRowType<Indices> ivcRow(const Indices& indices) const {
- return internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::CreateIndexSequence(indices,
- derived().rows());
-}
-
-template <typename Indices>
-inline IvcColType<Indices> ivcCol(const Indices& indices) const {
- return internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::CreateIndexSequence(indices,
- derived().cols());
-}
-
-template <typename Indices>
-inline IvcSizeType<Indices> ivcSize(const Indices& indices) const {
- return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices,
- derived().size());
- ;
-}
-
-// this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true
-template <typename RowIndices, typename ColIndices, typename EnableIf = void>
-struct IndexedViewSelector;
-
-// Generic
-template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<
- RowIndices, ColIndices,
- std::enable_if_t<
- internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>> {
- using ReturnType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
- using ConstReturnType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>;
-
- static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
- return ReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices));
- }
- static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
- const ColIndices& colIndices) {
- return ConstReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices));
- }
-};
-
-// Block
-template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices,
- std::enable_if_t<internal::traits<
- IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock>> {
- using ActualRowIndices = IvcRowType<RowIndices>;
- using ActualColIndices = IvcColType<ColIndices>;
- using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>;
- using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>;
- using ReturnType = typename internal::traits<IndexedViewType>::BlockType;
- using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType;
- using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
- using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
-
- static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
- auto actualRowIndices = derived.ivcRow(rowIndices);
- auto actualColIndices = derived.ivcCol(colIndices);
- return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
- RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
- }
- static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
- const ColIndices& colIndices) {
- auto actualRowIndices = derived.ivcRow(rowIndices);
- auto actualColIndices = derived.ivcCol(colIndices);
- return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices),
- RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices));
- }
-};
-
-// Scalar
-template <typename RowIndices, typename ColIndices>
-struct IndexedViewSelector<RowIndices, ColIndices,
- std::enable_if_t<internal::traits<
- IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar>> {
- using ReturnType = typename DenseBase<Derived>::Scalar&;
- using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
- using ActualRowIndices = IvcRowType<RowIndices>;
- using ActualColIndices = IvcColType<ColIndices>;
- using RowHelper = internal::IndexedViewHelper<ActualRowIndices>;
- using ColHelper = internal::IndexedViewHelper<ActualColIndices>;
- static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) {
- auto actualRowIndices = derived.ivcRow(rowIndices);
- auto actualColIndices = derived.ivcCol(colIndices);
- return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
- }
- static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices,
- const ColIndices& colIndices) {
- auto actualRowIndices = derived.ivcRow(rowIndices);
- auto actualColIndices = derived.ivcCol(colIndices);
- return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices));
- }
-};
-
-// this helper class assumes internal::is_valid_index_type<Indices>::value == false
-template <typename Indices, typename EnableIf = void>
-struct VectorIndexedViewSelector;
-
-// Generic
-template <typename Indices>
-struct VectorIndexedViewSelector<
- Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
- internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime != 1>> {
- static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor;
- using ZeroIndex = internal::SingleRange<Index(0)>;
- using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcSizeType<Indices>>;
- using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcSizeType<Indices>>;
-
- using ColMajorReturnType = IndexedView<Derived, IvcSizeType<Indices>, ZeroIndex>;
- using ConstColMajorReturnType = IndexedView<const Derived, IvcSizeType<Indices>, ZeroIndex>;
-
- using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type;
- using ConstReturnType =
- typename internal::conditional<IsRowMajor, ConstRowMajorReturnType, ConstColMajorReturnType>::type;
-
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
- static inline RowMajorReturnType run(Derived& derived, const Indices& indices) {
- return RowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
- }
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
- static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) {
- return ConstRowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices));
- }
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
- static inline ColMajorReturnType run(Derived& derived, const Indices& indices) {
- return ColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
- }
- template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
- static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) {
- return ConstColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0));
- }
-};
-
-// Block
-template <typename Indices>
-struct VectorIndexedViewSelector<
- Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value &&
- internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime == 1>> {
- using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
- using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>;
- using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>;
- static inline ReturnType run(Derived& derived, const Indices& indices) {
- auto actualIndices = derived.ivcSize(indices);
- return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
- }
- static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
- auto actualIndices = derived.ivcSize(indices);
- return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices));
- }
-};
-
-// Symbolic
-template <typename Indices>
-struct VectorIndexedViewSelector<Indices, std::enable_if_t<internal::is_single_range<IvcSizeType<Indices>>::value>> {
- using ReturnType = typename DenseBase<Derived>::Scalar&;
- using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType;
- using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>;
- static inline ReturnType run(Derived& derived, const Indices& indices) {
- auto actualIndices = derived.ivcSize(indices);
- return derived(Helper::first(actualIndices));
- }
- static inline ConstReturnType run(const Derived& derived, const Indices& indices) {
- auto actualIndices = derived.ivcSize(indices);
- return derived(Helper::first(actualIndices));
- }
-};
-
// SFINAE dummy types
template <typename RowIndices, typename ColIndices>
@@ -210,24 +32,26 @@
// non-const versions
-template <typename RowIndices, typename ColIndices>
-using IndexedViewType = typename IndexedViewSelector<RowIndices, ColIndices>::ReturnType;
+ template <typename RowIndices, typename ColIndices>
+ using IndexedViewType = typename internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::ReturnType;
-template <typename RowIndices, typename ColIndices, EnableOverload<RowIndices, ColIndices> = true>
-IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColIndices& colIndices) {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, colIndices);
-}
+ template <typename RowIndices, typename ColIndices, EnableOverload<RowIndices, ColIndices> = true>
+ IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColIndices& colIndices) {
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, colIndices);
+ }
template <typename RowType, size_t RowSize, typename ColIndices, typename RowIndices = Array<RowType, RowSize, 1>,
EnableOverload<RowIndices, ColIndices> = true>
IndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize], const ColIndices& colIndices) {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, colIndices);
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices},
+ colIndices);
}
template <typename RowIndices, typename ColType, size_t ColSize, typename ColIndices = Array<ColType, ColSize, 1>,
EnableOverload<RowIndices, ColIndices> = true>
IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColType (&colIndices)[ColSize]) {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, ColIndices{colIndices});
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices,
+ ColIndices{colIndices});
}
template <typename RowType, size_t RowSize, typename ColType, size_t ColSize,
@@ -235,32 +59,35 @@
EnableOverload<RowIndices, ColIndices> = true>
IndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize],
const ColType (&colIndices)[ColSize]) {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices});
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices},
+ ColIndices{colIndices});
}
// const versions
template <typename RowIndices, typename ColIndices>
-using ConstIndexedViewType = typename IndexedViewSelector<RowIndices, ColIndices>::ConstReturnType;
+using ConstIndexedViewType = typename internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::ConstReturnType;
template <typename RowIndices, typename ColIndices, EnableConstOverload<RowIndices, ColIndices> = true>
ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices,
const ColIndices& colIndices) const {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, colIndices);
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, colIndices);
}
template <typename RowType, size_t RowSize, typename ColIndices, typename RowIndices = Array<RowType, RowSize, 1>,
EnableConstOverload<RowIndices, ColIndices> = true>
ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize],
const ColIndices& colIndices) const {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, colIndices);
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices},
+ colIndices);
}
template <typename RowIndices, typename ColType, size_t ColSize, typename ColIndices = Array<ColType, ColSize, 1>,
EnableConstOverload<RowIndices, ColIndices> = true>
ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices,
const ColType (&colIndices)[ColSize]) const {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, ColIndices{colIndices});
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices,
+ ColIndices{colIndices});
}
template <typename RowType, size_t RowSize, typename ColType, size_t ColSize,
@@ -268,7 +95,8 @@
EnableConstOverload<RowIndices, ColIndices> = true>
ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize],
const ColType (&colIndices)[ColSize]) const {
- return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices});
+ return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices},
+ ColIndices{colIndices});
}
// Public API for 1D vectors/arrays
@@ -276,37 +104,37 @@
// non-const versions
template <typename Indices>
-using VectorIndexedViewType = typename VectorIndexedViewSelector<Indices>::ReturnType;
+using VectorIndexedViewType = typename internal::VectorIndexedViewSelector<Derived, Indices>::ReturnType;
template <typename Indices, EnableVectorOverload<Indices> = true>
VectorIndexedViewType<Indices> operator()(const Indices& indices) {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return VectorIndexedViewSelector<Indices>::run(derived(), indices);
+ return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), indices);
}
template <typename IndexType, size_t Size, typename Indices = Array<IndexType, Size, 1>,
EnableVectorOverload<Indices> = true>
VectorIndexedViewType<Indices> operator()(const IndexType (&indices)[Size]) {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return VectorIndexedViewSelector<Indices>::run(derived(), Indices{indices});
+ return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), Indices{indices});
}
// const versions
template <typename Indices>
-using ConstVectorIndexedViewType = typename VectorIndexedViewSelector<Indices>::ConstReturnType;
+using ConstVectorIndexedViewType = typename internal::VectorIndexedViewSelector<Derived, Indices>::ConstReturnType;
template <typename Indices, EnableConstVectorOverload<Indices> = true>
ConstVectorIndexedViewType<Indices> operator()(const Indices& indices) const {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return VectorIndexedViewSelector<Indices>::run(derived(), indices);
+ return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), indices);
}
template <typename IndexType, size_t Size, typename Indices = Array<IndexType, Size, 1>,
EnableConstVectorOverload<Indices> = true>
ConstVectorIndexedViewType<Indices> operator()(const IndexType (&indices)[Size]) const {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
- return VectorIndexedViewSelector<Indices>::run(derived(), Indices{indices});
+ return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), Indices{indices});
}
#else // EIGEN_PARSED_BY_DOXYGEN
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 543ef2e..9fb104c 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -614,9 +614,9 @@
VERIFY(o2.cols() == cols);
ArrayType2 o3(rows, cols);
- VERIFY(o3(0) == Scalar(rows) && o3(1) == Scalar(cols));
+ VERIFY(o3(0) == RealScalar(rows) && o3(1) == RealScalar(cols));
ArrayType2 o4(static_cast<int>(rows), static_cast<int>(cols));
- VERIFY(o4(0) == Scalar(rows) && o4(1) == Scalar(cols));
+ VERIFY(o4(0) == RealScalar(rows) && o4(1) == RealScalar(cols));
}
{
TwoDArrayType o1{rows, cols};
@@ -627,9 +627,9 @@
VERIFY(o2.cols() == cols);
ArrayType2 o3{rows, cols};
- VERIFY(o3(0) == Scalar(rows) && o3(1) == Scalar(cols));
+ VERIFY(o3(0) == RealScalar(rows) && o3(1) == RealScalar(cols));
ArrayType2 o4{int(rows), int(cols)};
- VERIFY(o4(0) == Scalar(rows) && o4(1) == Scalar(cols));
+ VERIFY(o4(0) == RealScalar(rows) && o4(1) == RealScalar(cols));
}
}
diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp
index 922a6d1..5be49d9 100644
--- a/test/bfloat16_float.cpp
+++ b/test/bfloat16_float.cpp
@@ -82,7 +82,7 @@
// Conversion to bool
VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(3)), true);
VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.33333f)), true);
- VERIFY_IS_EQUAL(bfloat16(-0.0), false);
+ VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(-0.0)), false);
VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.0)), false);
// Explicit conversion to float.
diff --git a/test/geo_eulerangles.cpp b/test/geo_eulerangles.cpp
index 3d443de..11c0449 100644
--- a/test/geo_eulerangles.cpp
+++ b/test/geo_eulerangles.cpp
@@ -23,6 +23,7 @@
typedef AngleAxis<Scalar> AngleAxisx;
const Matrix3 m(AngleAxisx(ea[0], Vector3::Unit(i)) * AngleAxisx(ea[1], Vector3::Unit(j)) *
AngleAxisx(ea[2], Vector3::Unit(k)));
+ const Scalar kPi = Scalar(EIGEN_PI);
// Test non-canonical eulerAngles
{
@@ -33,11 +34,11 @@
// approx_or_less_than does not work for 0
VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI));
- VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[1]);
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI));
- VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]);
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI));
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], kPi);
+ VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[1]);
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], kPi);
+ VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[2]);
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], kPi);
}
// Test canonicalEulerAngles
@@ -47,20 +48,20 @@
AngleAxisx(eabis[2], Vector3::Unit(k)));
VERIFY_IS_APPROX(m, mbis);
- VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[0]);
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI));
+ VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[0]);
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], kPi);
if (i != k) {
// Tait-Bryan sequence
- VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI / 2), eabis[1]);
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI / 2));
+ VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(kPi / 2), eabis[1]);
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(kPi / 2));
} else {
// Proper Euler sequence
// approx_or_less_than does not work for 0
VERIFY(0 < eabis[1] || test_isMuchSmallerThan(eabis[1], Scalar(1)));
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI));
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], kPi);
}
- VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]);
- VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI));
+ VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[2]);
+ VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], kPi);
}
}
@@ -100,7 +101,10 @@
typedef Quaternion<Scalar> Quaternionx;
typedef AngleAxis<Scalar> AngleAxisx;
- Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
+ const Scalar kPi = Scalar(EIGEN_PI);
+ const Scalar smallVal = static_cast<Scalar>(0.001);
+
+ Scalar a = internal::random<Scalar>(-kPi, kPi);
Quaternionx q1;
q1 = AngleAxisx(a, Vector3::Random().normalized());
Matrix3 m;
@@ -120,65 +124,65 @@
check_all_var(ea);
// Check with random angles in range [-pi:pi]x[-pi:pi]x[-pi:pi].
- ea = Array3::Random() * Scalar(EIGEN_PI);
+ ea = Array3::Random() * kPi;
check_all_var(ea);
- auto test_with_some_zeros = [](const Vector3& eaz) {
+ auto test_with_some_zeros = [=](const Vector3& eaz) {
check_all_var(eaz);
Vector3 ea_glz = eaz;
ea_glz[0] = Scalar(0);
check_all_var(ea_glz);
- ea_glz[0] = internal::random<Scalar>(-0.001, 0.001);
+ ea_glz[0] = internal::random<Scalar>(-smallVal, smallVal);
check_all_var(ea_glz);
ea_glz[2] = Scalar(0);
check_all_var(ea_glz);
- ea_glz[2] = internal::random<Scalar>(-0.001, 0.001);
+ ea_glz[2] = internal::random<Scalar>(-smallVal, smallVal);
check_all_var(ea_glz);
};
// Check gimbal lock configurations and a bit noisy gimbal locks
Vector3 ea_gl = ea;
- ea_gl[1] = EIGEN_PI / 2;
+ ea_gl[1] = kPi / 2;
test_with_some_zeros(ea_gl);
- ea_gl[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_gl[1] = -EIGEN_PI / 2;
+ ea_gl[1] = -kPi / 2;
test_with_some_zeros(ea_gl);
- ea_gl[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_gl[1] = EIGEN_PI / 2;
+ ea_gl[1] = kPi / 2;
ea_gl[2] = ea_gl[0];
test_with_some_zeros(ea_gl);
- ea_gl[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_gl[1] = -EIGEN_PI / 2;
+ ea_gl[1] = -kPi / 2;
test_with_some_zeros(ea_gl);
- ea_gl[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
// Similar to above, but with pi instead of pi/2
Vector3 ea_pi = ea;
- ea_pi[1] = EIGEN_PI;
+ ea_pi[1] = kPi;
test_with_some_zeros(ea_gl);
- ea_pi[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_pi[1] = -EIGEN_PI;
+ ea_pi[1] = -kPi;
test_with_some_zeros(ea_gl);
- ea_pi[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_pi[1] = EIGEN_PI;
+ ea_pi[1] = kPi;
ea_pi[2] = ea_pi[0];
test_with_some_zeros(ea_gl);
- ea_pi[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea_pi[1] = -EIGEN_PI;
+ ea_pi[1] = -kPi;
test_with_some_zeros(ea_gl);
- ea_pi[1] += internal::random<Scalar>(-0.001, 0.001);
+ ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal);
test_with_some_zeros(ea_gl);
- ea[2] = ea[0] = internal::random<Scalar>(0, Scalar(EIGEN_PI));
+ ea[2] = ea[0] = internal::random<Scalar>(0, kPi);
check_all_var(ea);
- ea[0] = ea[1] = internal::random<Scalar>(0, Scalar(EIGEN_PI));
+ ea[0] = ea[1] = internal::random<Scalar>(0, kPi);
check_all_var(ea);
ea[1] = 0;
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index f165e8b..064cc4a 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp
@@ -447,7 +447,7 @@
// Check compilation of varying integer types as index types:
Index i = n / 2;
- short i_short(i);
+ short i_short = static_cast<short>(i);
std::size_t i_sizet(i);
VERIFY_IS_EQUAL(a(i), a.coeff(i_short));
VERIFY_IS_EQUAL(a(i), a.coeff(i_sizet));
@@ -790,6 +790,7 @@
VERIFY_IS_EQUAL(int(slice1.SizeAtCompileTime), 6);
VERIFY_IS_EQUAL(int(slice2.SizeAtCompileTime), 6);
auto slice3 = A(all, seq(fix<0>, last, fix<2>));
+ TEST_SET_BUT_UNUSED_VARIABLE(slice3)
VERIFY_IS_EQUAL(int(slice3.RowsAtCompileTime), kRows);
VERIFY_IS_EQUAL(int(slice3.ColsAtCompileTime), (kCols + 1) / 2);
}
@@ -812,7 +813,7 @@
{
std::vector<int> ind{4, 2, 5, 5, 3};
auto slice1 = A(all, ind);
- for (int i = 0; i < ind.size(); ++i) {
+ for (size_t i = 0; i < ind.size(); ++i) {
VERIFY_IS_EQUAL(slice1.col(i), A.col(ind[i]));
}
diff --git a/test/mapstaticmethods.cpp b/test/mapstaticmethods.cpp
index ac90bdc..b18b24a 100644
--- a/test/mapstaticmethods.cpp
+++ b/test/mapstaticmethods.cpp
@@ -9,21 +9,14 @@
#include "main.h"
-// GCC<=4.8 has spurious shadow warnings, because `ptr` re-appears inside template instantiations
-// workaround: put these in an anonymous namespace
-namespace {
-float* ptr;
-const float* const_ptr;
-} // namespace
-
template <typename PlainObjectType, bool IsDynamicSize = PlainObjectType::SizeAtCompileTime == Dynamic,
bool IsVector = PlainObjectType::IsVectorAtCompileTime>
struct mapstaticmethods_impl {};
template <typename PlainObjectType, bool IsVector>
struct mapstaticmethods_impl<PlainObjectType, false, IsVector> {
- static void run(const PlainObjectType& m) {
- mapstaticmethods_impl<PlainObjectType, true, IsVector>::run(m);
+ static void run(const PlainObjectType& m, float* ptr, const float* const_ptr) {
+ mapstaticmethods_impl<PlainObjectType, true, IsVector>::run(m, ptr, const_ptr);
int i = internal::random<int>(2, 5), j = internal::random<int>(2, 5);
@@ -66,7 +59,7 @@
template <typename PlainObjectType>
struct mapstaticmethods_impl<PlainObjectType, true, false> {
- static void run(const PlainObjectType& m) {
+ static void run(const PlainObjectType& m, float* ptr, const float* const_ptr) {
Index rows = m.rows(), cols = m.cols();
int i = internal::random<int>(2, 5), j = internal::random<int>(2, 5);
@@ -110,7 +103,7 @@
template <typename PlainObjectType>
struct mapstaticmethods_impl<PlainObjectType, true, true> {
- static void run(const PlainObjectType& v) {
+ static void run(const PlainObjectType& v, float* ptr, const float* const_ptr) {
Index size = v.size();
int i = internal::random<int>(2, 5);
@@ -133,34 +126,34 @@
};
template <typename PlainObjectType>
-void mapstaticmethods(const PlainObjectType& m) {
- mapstaticmethods_impl<PlainObjectType>::run(m);
+void mapstaticmethods(const PlainObjectType& m, float* ptr, const float* const_ptr) {
+ mapstaticmethods_impl<PlainObjectType>::run(m, ptr, const_ptr);
VERIFY(true); // just to avoid 'unused function' warning
}
EIGEN_DECLARE_TEST(mapstaticmethods) {
- ptr = internal::aligned_new<float>(1000);
+ float* ptr = internal::aligned_new<float>(1000);
for (int i = 0; i < 1000; i++) ptr[i] = float(i);
- const_ptr = ptr;
+ const float* const_ptr = ptr;
- CALL_SUBTEST_1((mapstaticmethods(Matrix<float, 1, 1>())));
- CALL_SUBTEST_1((mapstaticmethods(Vector2f())));
- CALL_SUBTEST_2((mapstaticmethods(Vector3f())));
- CALL_SUBTEST_2((mapstaticmethods(Matrix2f())));
- CALL_SUBTEST_3((mapstaticmethods(Matrix4f())));
- CALL_SUBTEST_3((mapstaticmethods(Array4f())));
- CALL_SUBTEST_4((mapstaticmethods(Array3f())));
- CALL_SUBTEST_4((mapstaticmethods(Array33f())));
- CALL_SUBTEST_5((mapstaticmethods(Array44f())));
- CALL_SUBTEST_5((mapstaticmethods(VectorXf(1))));
- CALL_SUBTEST_5((mapstaticmethods(VectorXf(8))));
- CALL_SUBTEST_6((mapstaticmethods(MatrixXf(1, 1))));
- CALL_SUBTEST_6((mapstaticmethods(MatrixXf(5, 7))));
- CALL_SUBTEST_7((mapstaticmethods(ArrayXf(1))));
- CALL_SUBTEST_7((mapstaticmethods(ArrayXf(5))));
- CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(1, 1))));
- CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(8, 6))));
+ CALL_SUBTEST_1((mapstaticmethods(Matrix<float, 1, 1>(), ptr, const_ptr)));
+ CALL_SUBTEST_1((mapstaticmethods(Vector2f(), ptr, const_ptr)));
+ CALL_SUBTEST_2((mapstaticmethods(Vector3f(), ptr, const_ptr)));
+ CALL_SUBTEST_2((mapstaticmethods(Matrix2f(), ptr, const_ptr)));
+ CALL_SUBTEST_3((mapstaticmethods(Matrix4f(), ptr, const_ptr)));
+ CALL_SUBTEST_3((mapstaticmethods(Array4f(), ptr, const_ptr)));
+ CALL_SUBTEST_4((mapstaticmethods(Array3f(), ptr, const_ptr)));
+ CALL_SUBTEST_4((mapstaticmethods(Array33f(), ptr, const_ptr)));
+ CALL_SUBTEST_5((mapstaticmethods(Array44f(), ptr, const_ptr)));
+ CALL_SUBTEST_5((mapstaticmethods(VectorXf(1), ptr, const_ptr)));
+ CALL_SUBTEST_5((mapstaticmethods(VectorXf(8), ptr, const_ptr)));
+ CALL_SUBTEST_6((mapstaticmethods(MatrixXf(1, 1), ptr, const_ptr)));
+ CALL_SUBTEST_6((mapstaticmethods(MatrixXf(5, 7), ptr, const_ptr)));
+ CALL_SUBTEST_7((mapstaticmethods(ArrayXf(1), ptr, const_ptr)));
+ CALL_SUBTEST_7((mapstaticmethods(ArrayXf(5), ptr, const_ptr)));
+ CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(1, 1), ptr, const_ptr)));
+ CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(8, 6), ptr, const_ptr)));
internal::aligned_delete(ptr, 1000);
}
diff --git a/test/numext.cpp b/test/numext.cpp
index ac9b66d..a2d511b 100644
--- a/test/numext.cpp
+++ b/test/numext.cpp
@@ -34,26 +34,46 @@
#define VERIFY_IS_EQUAL_OR_NANS(a, b) VERIFY(test_is_equal_or_nans(a, b))
template <typename T>
+void check_negate() {
+ Index size = 1000;
+ for (Index i = 0; i < size; i++) {
+ T val = i == 0 ? T(0) : internal::random<T>(T(0), NumTraits<T>::highest());
+ T neg_val = numext::negate(val);
+ VERIFY_IS_EQUAL(T(val + neg_val), T(0));
+ VERIFY_IS_EQUAL(numext::negate(neg_val), val);
+ }
+}
+
+template <typename T>
void check_abs() {
typedef typename NumTraits<T>::Real Real;
Real zero(0);
- if (NumTraits<T>::IsSigned) VERIFY_IS_EQUAL(numext::abs(-T(1)), T(1));
+ if (NumTraits<T>::IsSigned) VERIFY_IS_EQUAL(numext::abs(numext::negate(T(1))), T(1));
VERIFY_IS_EQUAL(numext::abs(T(0)), T(0));
VERIFY_IS_EQUAL(numext::abs(T(1)), T(1));
for (int k = 0; k < 100; ++k) {
T x = internal::random<T>();
- if (!internal::is_same<T, bool>::value) x = x / Real(2);
+ x = x / Real(2);
if (NumTraits<T>::IsSigned) {
- VERIFY_IS_EQUAL(numext::abs(x), numext::abs(-x));
- VERIFY(numext::abs(-x) >= zero);
+ VERIFY_IS_EQUAL(numext::abs(x), numext::abs(numext::negate(x)));
+ VERIFY(numext::abs(numext::negate(x)) >= zero);
}
VERIFY(numext::abs(x) >= zero);
VERIFY_IS_APPROX(numext::abs2(x), numext::abs2(numext::abs(x)));
}
}
+template <>
+void check_abs<bool>() {
+ for (bool x : {true, false}) {
+ VERIFY_IS_EQUAL(numext::abs(x), x);
+ VERIFY(numext::abs(x) >= false);
+ VERIFY_IS_EQUAL(numext::abs2(x), numext::abs2(numext::abs(x)));
+ }
+}
+
template <typename T>
void check_arg() {
typedef typename NumTraits<T>::Real Real;
@@ -236,16 +256,17 @@
negative_values = {static_cast<T>(-1), static_cast<T>(NumTraits<T>::lowest())};
non_negative_values = {static_cast<T>(0), static_cast<T>(1), static_cast<T>(NumTraits<T>::highest())};
} else {
- // has sign bit
- const T neg_zero = static_cast<T>(-0.0);
- const T neg_one = static_cast<T>(-1.0);
- const T neg_inf = -std::numeric_limits<T>::infinity();
- const T neg_nan = -std::numeric_limits<T>::quiet_NaN();
// does not have sign bit
const T pos_zero = static_cast<T>(0.0);
const T pos_one = static_cast<T>(1.0);
const T pos_inf = std::numeric_limits<T>::infinity();
const T pos_nan = std::numeric_limits<T>::quiet_NaN();
+ // has sign bit
+ const T neg_zero = numext::negate(pos_zero);
+ const T neg_one = numext::negate(pos_one);
+ const T neg_inf = numext::negate(pos_inf);
+ const T neg_nan = numext::negate(pos_nan);
+
negative_values = {neg_zero, neg_one, neg_inf, neg_nan};
non_negative_values = {pos_zero, pos_one, pos_inf, pos_nan};
}
@@ -273,6 +294,22 @@
EIGEN_DECLARE_TEST(numext) {
for (int k = 0; k < g_repeat; ++k) {
+ CALL_SUBTEST(check_negate<signed char>());
+ CALL_SUBTEST(check_negate<unsigned char>());
+ CALL_SUBTEST(check_negate<short>());
+ CALL_SUBTEST(check_negate<unsigned short>());
+ CALL_SUBTEST(check_negate<int>());
+ CALL_SUBTEST(check_negate<unsigned int>());
+ CALL_SUBTEST(check_negate<long>());
+ CALL_SUBTEST(check_negate<unsigned long>());
+ CALL_SUBTEST(check_negate<half>());
+ CALL_SUBTEST(check_negate<bfloat16>());
+ CALL_SUBTEST(check_negate<float>());
+ CALL_SUBTEST(check_negate<double>());
+ CALL_SUBTEST(check_negate<long double>());
+ CALL_SUBTEST(check_negate<std::complex<float> >());
+ CALL_SUBTEST(check_negate<std::complex<double> >());
+
CALL_SUBTEST(check_abs<bool>());
CALL_SUBTEST(check_abs<signed char>());
CALL_SUBTEST(check_abs<unsigned char>());
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index db8c9b5..8bfa321 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -34,11 +34,11 @@
}
template <typename T>
inline T REF_NMADD(const T& a, const T& b, const T& c) {
- return (-a * b) + c;
+ return c - a * b;
}
template <typename T>
inline T REF_NMSUB(const T& a, const T& b, const T& c) {
- return (-a * b) - c;
+ return test::negate(a * b + c);
}
template <typename T>
inline T REF_DIV(const T& a, const T& b) {
@@ -427,6 +427,32 @@
}
};
+template <typename Scalar, typename Packet, bool HasNegate = internal::packet_traits<Scalar>::HasNegate>
+struct negate_test_impl {
+ static void run_negate(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) {
+ CHECK_CWISE1_IF(HasNegate, test::negate, internal::pnegate);
+ }
+ static void run_nmsub(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) {
+ CHECK_CWISE3_IF(HasNegate, REF_NMSUB, internal::pnmsub);
+ }
+};
+
+template <typename Scalar, typename Packet>
+struct negate_test_impl<Scalar, Packet, false> {
+ static void run_negate(Scalar*, Scalar*, Scalar*, int) {}
+ static void run_nmsub(Scalar*, Scalar*, Scalar*, int) {}
+};
+
+template <typename Scalar, typename Packet>
+void negate_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
+ negate_test_impl<Scalar, Packet>::run_negate(data1, data2, ref, size);
+}
+
+template <typename Scalar, typename Packet>
+void nmsub_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) {
+ negate_test_impl<Scalar, Packet>::run_negate(data1, data2, ref, size);
+}
+
template <typename Scalar, typename Packet>
void packetmath() {
typedef internal::packet_traits<Scalar> PacketTraits;
@@ -533,7 +559,7 @@
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
- CHECK_CWISE1_IF(PacketTraits::HasNegate, test::negate, internal::pnegate);
+ negate_test<Scalar, Packet>(data1, data2, ref, PacketSize);
CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
CHECK_CWISE1(numext::conj, internal::pconj);
CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
@@ -689,7 +715,7 @@
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt);
CHECK_CWISE3_IF(true, REF_MADD, internal::pmadd);
if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
- CHECK_CWISE3_IF(PacketTraits::HasNegate, REF_NMSUB, internal::pnmsub);
+ nmsub_test<Scalar, Packet>(data1, data2, ref, PacketSize);
}
// For pmsub, pnmadd, the values can cancel each other to become near zero,
@@ -698,11 +724,11 @@
for (int i = 0; i < PacketSize; ++i) {
data1[i] = numext::abs(internal::random<Scalar>());
data1[i + PacketSize] = numext::abs(internal::random<Scalar>());
- data1[i + 2 * PacketSize] = -numext::abs(internal::random<Scalar>());
+ data1[i + 2 * PacketSize] = Scalar(0) - numext::abs(internal::random<Scalar>());
}
if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) {
CHECK_CWISE3_IF(true, REF_MSUB, internal::pmsub);
- CHECK_CWISE3_IF(PacketTraits::HasNegate, REF_NMADD, internal::pnmadd);
+ CHECK_CWISE3_IF(true, REF_NMADD, internal::pnmadd);
}
}
diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h
index 86a01fb..d8de04b 100644
--- a/test/packetmath_test_shared.h
+++ b/test/packetmath_test_shared.h
@@ -22,11 +22,16 @@
namespace test {
-template <typename T>
+template <typename T, std::enable_if_t<NumTraits<T>::IsSigned, bool> = true>
T negate(const T& x) {
return -x;
}
+template <typename T, std::enable_if_t<!NumTraits<T>::IsSigned, bool> = true>
+T negate(const T& x) {
+ return T(0) - x;
+}
+
template <typename T>
Map<const Array<unsigned char, sizeof(T), 1> > bits(const T& x) {
return Map<const Array<unsigned char, sizeof(T), 1> >(reinterpret_cast<const unsigned char*>(&x));
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 8d47fb0..e4b1963 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -110,29 +110,29 @@
// test move
{
- SparseVectorType v3(std::move(v1));
- VERIFY_IS_APPROX(v3, refV1);
- v1 = v3;
+ SparseVectorType tmp(std::move(v1));
+ VERIFY_IS_APPROX(tmp, refV1);
+ v1 = tmp;
}
{
- SparseVectorType v3;
- v3 = std::move(v1);
- VERIFY_IS_APPROX(v3, refV1);
- v1 = v3;
+ SparseVectorType tmp;
+ tmp = std::move(v1);
+ VERIFY_IS_APPROX(tmp, refV1);
+ v1 = tmp;
}
{
- SparseVectorType v3(std::move(mv1));
- VERIFY_IS_APPROX(v3, refV1);
- mv1 = v3;
+ SparseVectorType tmp(std::move(mv1));
+ VERIFY_IS_APPROX(tmp, refV1);
+ mv1 = tmp;
}
{
- SparseVectorType v3;
- v3 = std::move(mv1);
- VERIFY_IS_APPROX(v3, refV1);
- mv1 = v3;
+ SparseVectorType tmp;
+ tmp = std::move(mv1);
+ VERIFY_IS_APPROX(tmp, refV1);
+ mv1 = tmp;
}
// test conservative resize
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index aac7248..dc1a5c7 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -287,6 +287,7 @@
typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half,
typename internal::packet_traits<Scalar>::type>::value>
struct vectorization_logic_half {
+ using RealScalar = typename NumTraits<Scalar>::Real;
typedef internal::packet_traits<Scalar> PacketTraits;
typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size;
@@ -355,10 +356,12 @@
VERIFY(test_assign(Vector1(), Vector1().template segment<MinVSize>(0).derived(),
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,
CompleteUnrolling));
- VERIFY(test_assign(Vector1(), Scalar(2.1) * Vector1() - Vector1(), InnerVectorizedTraversal, CompleteUnrolling));
+ VERIFY(test_assign(Vector1(), Scalar(RealScalar(2.1)) * Vector1() - Vector1(), InnerVectorizedTraversal,
+ CompleteUnrolling));
VERIFY(test_assign(
Vector1(),
- (Scalar(2.1) * Vector1().template segment<MinVSize>(0) - Vector1().template segment<MinVSize>(0)).derived(),
+ (Scalar(RealScalar(2.1)) * Vector1().template segment<MinVSize>(0) - Vector1().template segment<MinVSize>(0))
+ .derived(),
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal, CompleteUnrolling));
VERIFY(test_assign(Vector1(), Vector1().cwiseProduct(Vector1()), InnerVectorizedTraversal, CompleteUnrolling));
VERIFY(test_assign(Vector1(), Vector1().template cast<Scalar>(), InnerVectorizedTraversal, CompleteUnrolling));