Update Eigen to commit:122befe54cc0c31273d9e1caef80b49ad834bf4c CHANGELOG ========= 122befe54 - Fix "unary minus operator applied to unsigned type, result still unsigned" on MSVC and other stupid warnings dcdb0233c - Refactor indexed view to appease MSVC 14.16. 5226566a1 - Speed up pldexp_generic. 3c6521ed9 - Add constexpr to accessors in DenseBase, Quaternions and Translations 3c9109238 - Add support for Packet8l to AVX512. PiperOrigin-RevId: 625378611 Change-Id: Ib21f5d585439de99cba53737d4ceaebe323f5cbe
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 48c6d73..30e0aa3 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -89,12 +89,13 @@ * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator<Derived>(derived()).coeff(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeffByOuterInner(Index outer, + Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); } @@ -102,7 +103,7 @@ * * \sa operator()(Index,Index), operator[](Index) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeff(row, col); } @@ -122,7 +123,7 @@ * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType coeff(Index index) const { EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -137,7 +138,7 @@ * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator[](Index index) const { EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); @@ -154,32 +155,32 @@ * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); return coeff(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType y() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType z() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR CoeffReturnType w() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } @@ -361,32 +362,32 @@ * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < size()); return coeffRef(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& y() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& z() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Scalar& w() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; }
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 58a197f..61f0eb9 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -335,12 +335,9 @@ /** \internal \returns -a (coeff-wise) */ template <typename Packet> EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { - return -a; -} - -template <> -EIGEN_DEVICE_FUNC inline bool pnegate(const bool& a) { - return !a; + EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value), + NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + return numext::negate(a); } /** \internal \returns conj(a) (coeff-wise) */ @@ -1117,8 +1114,9 @@ /** \internal \returns the log10 of \a a (coeff-wise) */ template <typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) { - typedef typename internal::unpacket_traits<Packet>::type Scalar; - return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a)); + using Scalar = typename internal::unpacket_traits<Packet>::type; + using RealScalar = typename NumTraits<Scalar>::Real; + return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a)); } /** \internal \returns the square-root of \a a (coeff-wise) */ @@ -1293,13 +1291,13 @@ /** \internal \returns -(a * b) + c (coeff-wise) */ template <typename Packet> EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { - return padd(pnegate(pmul(a, b)), c); + return psub(c, pmul(a, b)); } -/** \internal \returns -(a * b) - c (coeff-wise) */ +/** \internal \returns -((a * b + c) (coeff-wise) */ template <typename Packet> EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { - return psub(pnegate(pmul(a, b)), c); + return pnegate(pmadd(a, b, c)); } /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned @@ -1403,6 +1401,12 @@ template <size_t N> struct Selector { bool select[N]; + template <typename MaskType = int> + EIGEN_DEVICE_FUNC inline MaskType mask(size_t begin = 0, size_t end = N) const { + MaskType res = 0; + for (size_t i = begin; i < end; i++) res |= (static_cast<MaskType>(select[i]) << i); + return res; + } }; template <typename Packet>
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index f907d1e..2a42b18 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -861,6 +861,25 @@ typedef Scalar type; }; +// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC +// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero + +template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger> +struct negate_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; } +}; + +template <typename Scalar> +struct negate_impl<Scalar, true> { + EIGEN_STATIC_ASSERT((!is_same<Scalar, bool>::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; } +}; + +template <typename Scalar> +struct negate_retval { + typedef Scalar type; +}; + template <typename Scalar, bool IsInteger = NumTraits<typename unpacket_traits<Scalar>::type>::IsInteger> struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { @@ -1067,6 +1086,11 @@ } template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x); +} + +template <typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 2383e46..a53c38d 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -1844,7 +1844,7 @@ #endif } template <> -EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& a) { +EIGEN_STRONG_INLINE Packet8ui psignbit(const Packet8ui& /*unused*/) { return _mm256_setzero_si256(); } #ifdef EIGEN_VECTORIZE_AVX2 @@ -1853,7 +1853,7 @@ return _mm256_castsi256_pd(_mm256_cmpgt_epi64(_mm256_setzero_si256(), _mm256_castpd_si256(a))); } template <> -EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& a) { +EIGEN_STRONG_INLINE Packet4ul psignbit(const Packet4ul& /*unused*/) { return _mm256_setzero_si256(); } #endif
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index ed2f189..5c53556 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -34,7 +34,7 @@ typedef __m512 Packet16f; typedef __m512i Packet16i; typedef __m512d Packet8d; -// TODO(rmlarsen): Add support for Packet8l. +typedef eigen_packet_wrapper<__m512i, 1> Packet8l; #ifndef EIGEN_VECTORIZE_AVX512FP16 typedef eigen_packet_wrapper<__m256i, 1> Packet16h; #endif @@ -52,6 +52,10 @@ struct is_arithmetic<__m512d> { enum { value = true }; }; +template <> +struct is_arithmetic<Packet8l> { + enum { value = true }; +}; #ifndef EIGEN_VECTORIZE_AVX512FP16 template <> @@ -172,6 +176,13 @@ }; template <> +struct packet_traits<int64_t> : default_packet_traits { + typedef Packet8l type; + typedef Packet4l half; + enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 8 }; +}; + +template <> struct unpacket_traits<Packet16f> { typedef float type; typedef Packet8f half; @@ -190,6 +201,7 @@ struct unpacket_traits<Packet8d> { typedef double type; typedef Packet4d half; + typedef Packet8l integer_packet; typedef uint8_t mask_t; enum { size = 8, @@ -213,6 +225,19 @@ }; }; +template <> +struct unpacket_traits<Packet8l> { + typedef int64_t type; + typedef Packet4l half; + enum { + size = 8, + alignment = Aligned64, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + #ifndef EIGEN_VECTORIZE_AVX512FP16 template <> struct unpacket_traits<Packet16h> { @@ -240,6 +265,10 @@ EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) { return _mm512_set1_epi32(from); } +template <> +EIGEN_STRONG_INLINE Packet8l pset1<Packet8l>(const int64_t& from) { + return _mm512_set1_epi64(from); +} template <> EIGEN_STRONG_INLINE Packet16f pset1frombits<Packet16f>(unsigned int from) { @@ -265,6 +294,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l pzero(const Packet8l& /*a*/) { + return _mm512_setzero_si512(); +} + +template <> EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /*a*/) { return _mm512_castsi512_ps(_mm512_set_epi32(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1)); } @@ -276,6 +310,10 @@ EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /*a*/) { return _mm512_castsi512_pd(_mm512_set_epi32(0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1)); } +template <> +EIGEN_STRONG_INLINE Packet8l peven_mask(const Packet8l& /*a*/) { + return _mm512_set_epi32(0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1); +} template <> EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) { @@ -313,6 +351,10 @@ EIGEN_STRONG_INLINE Packet16i plset<Packet16i>(const int& a) { return _mm512_add_epi32(_mm512_set1_epi32(a), _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } +template <> +EIGEN_STRONG_INLINE Packet8l plset<Packet8l>(const int64_t& a) { + return _mm512_add_epi64(_mm512_set1_epi64(a), _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)); +} template <> EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a, const Packet16f& b) { @@ -326,6 +368,10 @@ EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a, const Packet16i& b) { return _mm512_add_epi32(a, b); } +template <> +EIGEN_STRONG_INLINE Packet8l padd<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_add_epi64(a, b); +} template <> EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a, const Packet16f& b, uint16_t umask) { @@ -350,6 +396,10 @@ EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a, const Packet16i& b) { return _mm512_sub_epi32(a, b); } +template <> +EIGEN_STRONG_INLINE Packet8l psub<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_sub_epi64(a, b); +} template <> EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) { @@ -372,6 +422,10 @@ EIGEN_STRONG_INLINE Packet16i pnegate(const Packet16i& a) { return _mm512_sub_epi32(_mm512_setzero_si512(), a); } +template <> +EIGEN_STRONG_INLINE Packet8l pnegate(const Packet8l& a) { + return _mm512_sub_epi64(_mm512_setzero_si512(), a); +} template <> EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) { @@ -385,6 +439,10 @@ EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) { return a; } +template <> +EIGEN_STRONG_INLINE Packet8l pconj(const Packet8l& a) { + return a; +} template <> EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a, const Packet16f& b) { @@ -398,6 +456,14 @@ EIGEN_STRONG_INLINE Packet16i pmul<Packet16i>(const Packet16i& a, const Packet16i& b) { return _mm512_mullo_epi32(a, b); } +template <> +EIGEN_STRONG_INLINE Packet8l pmul<Packet8l>(const Packet8l& a, const Packet8l& b) { +#ifdef EIGEN_VECTORIZE_AVX512DQ + return _mm512_mullo_epi64(a, b); +#else + return _mm512_mullox_epi64(a, b); +#endif +} template <> EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a, const Packet16f& b) { @@ -467,6 +533,12 @@ } template <> +EIGEN_DEVICE_FUNC inline Packet8l pselect(const Packet8l& mask, const Packet8l& a, const Packet8l& b) { + __mmask8 mask8 = _mm512_cmpeq_epi64_mask(mask, _mm512_setzero_si512()); + return _mm512_mask_blend_epi64(mask8, a, b); +} + +template <> EIGEN_DEVICE_FUNC inline Packet8d pselect(const Packet8d& mask, const Packet8d& a, const Packet8d& b) { __mmask8 mask8 = _mm512_cmp_epi64_mask(_mm512_castpd_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); return _mm512_mask_blend_pd(mask8, a, b); @@ -486,6 +558,10 @@ EIGEN_STRONG_INLINE Packet16i pmin<Packet16i>(const Packet16i& a, const Packet16i& b) { return _mm512_min_epi32(b, a); } +template <> +EIGEN_STRONG_INLINE Packet8l pmin<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_min_epi64(b, a); +} template <> EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a, const Packet16f& b) { @@ -501,6 +577,10 @@ EIGEN_STRONG_INLINE Packet16i pmax<Packet16i>(const Packet16i& a, const Packet16i& b) { return _mm512_max_epi32(b, a); } +template <> +EIGEN_STRONG_INLINE Packet8l pmax<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_max_epi64(b, a); +} // Add specializations for min/max with prescribed NaN progation. template <> @@ -593,46 +673,62 @@ template <> EIGEN_STRONG_INLINE Packet16f pisnan(const Packet16f& a) { __mmask16 mask = _mm512_cmp_ps_mask(a, a, _CMP_UNORD_Q); - return _mm512_castsi512_ps(_mm512_maskz_set1_epi32(mask, 0xffffffffu)); + return _mm512_castsi512_ps(_mm512_maskz_set1_epi32(mask, int32_t(-1))); } template <> EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) { __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); - return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu)); + return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1))); } template <> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) { __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ); - return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu)); + return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1))); } template <> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) { __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); - return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu)); + return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1))); } template <> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) { __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ); - return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu)); + return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1))); } template <> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) { __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_EQ); - return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu); + return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)); } template <> EIGEN_STRONG_INLINE Packet16i pcmp_le(const Packet16i& a, const Packet16i& b) { __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LE); - return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu); + return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)); } template <> EIGEN_STRONG_INLINE Packet16i pcmp_lt(const Packet16i& a, const Packet16i& b) { __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT); - return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, 0xffffffffu); + return _mm512_mask_set1_epi32(_mm512_setzero_epi32(), mask, int32_t(-1)); +} + +template <> +EIGEN_STRONG_INLINE Packet8l pcmp_eq(const Packet8l& a, const Packet8l& b) { + __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_EQ); + return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1)); +} +template <> +EIGEN_STRONG_INLINE Packet8l pcmp_le(const Packet8l& a, const Packet8l& b) { + __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LE); + return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1)); +} +template <> +EIGEN_STRONG_INLINE Packet8l pcmp_lt(const Packet8l& a, const Packet8l& b) { + __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LT); + return _mm512_mask_set1_epi64(_mm512_setzero_si512(), mask, int64_t(-1)); } template <> @@ -685,7 +781,12 @@ template <> EIGEN_STRONG_INLINE Packet16i ptrue<Packet16i>(const Packet16i& /*a*/) { - return _mm512_set1_epi32(0xffffffffu); + return _mm512_set1_epi32(int32_t(-1)); +} + +template <> +EIGEN_STRONG_INLINE Packet8l ptrue<Packet8l>(const Packet8l& /*a*/) { + return _mm512_set1_epi64(int64_t(-1)); } template <> @@ -704,6 +805,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l pand<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_and_si512(a, b); +} + +template <> EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a, const Packet16f& b) { #ifdef EIGEN_VECTORIZE_AVX512DQ return _mm512_and_ps(a, b); @@ -733,6 +839,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l por<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_or_si512(a, b); +} + +template <> EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a, const Packet16f& b) { #ifdef EIGEN_VECTORIZE_AVX512DQ return _mm512_or_ps(a, b); @@ -756,6 +867,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l pxor<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_xor_si512(a, b); +} + +template <> EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a, const Packet16f& b) { #ifdef EIGEN_VECTORIZE_AVX512DQ return _mm512_xor_ps(a, b); @@ -779,6 +895,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l pandnot<Packet8l>(const Packet8l& a, const Packet8l& b) { + return _mm512_andnot_si512(b, a); +} + +template <> EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a, const Packet16f& b) { #ifdef EIGEN_VECTORIZE_AVX512DQ return _mm512_andnot_ps(b, a); @@ -825,6 +946,21 @@ return _mm512_slli_epi32(a, N); } +template <int N> +EIGEN_STRONG_INLINE Packet8l parithmetic_shift_right(Packet8l a) { + return _mm512_srai_epi64(a, N); +} + +template <int N> +EIGEN_STRONG_INLINE Packet8l plogical_shift_right(Packet8l a) { + return _mm512_srli_epi64(a, N); +} + +template <int N> +EIGEN_STRONG_INLINE Packet8l plogical_shift_left(Packet8l a) { + return _mm512_slli_epi64(a, N); +} + template <> EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from); @@ -835,7 +971,11 @@ } template <> EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) { - EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(reinterpret_cast<const __m512i*>(from)); + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_epi64(from); +} +template <> +EIGEN_STRONG_INLINE Packet8l pload<Packet8l>(const int64_t* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_epi64(from); } template <> @@ -848,7 +988,11 @@ } template <> EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(reinterpret_cast<const __m512i*>(from)); + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi32(from); +} +template <> +EIGEN_STRONG_INLINE Packet8l ploadu<Packet8l>(const int64_t* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi64(from); } template <> @@ -868,42 +1012,35 @@ EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) { // an unaligned load is required here as there is no requirement // on the alignment of input pointer 'from' - __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); + __m256i low_half = _mm256_castps_si256(_mm256_loadu_ps(from)); __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); return pairs; } -#ifdef EIGEN_VECTORIZE_AVX512DQ -// FIXME: this does not look optimal, better load a Packet4d and shuffle... -// Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, +// Loads 4 doubles from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); - return x; + Packet8d tmp = _mm512_castpd256_pd512(ploadu<Packet4d>(from)); + const Packet8l scatter_mask = _mm512_set_epi64(3, 3, 2, 2, 1, 1, 0, 0); + return _mm512_permutexvar_pd(scatter_mask, tmp); } -#else + +// Loads 4 int64_t from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3, +// a3} template <> -EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_mask_broadcastsd_pd(x, 0x3 << 0, _mm_load_sd(from + 0)); - x = _mm512_mask_broadcastsd_pd(x, 0x3 << 2, _mm_load_sd(from + 1)); - x = _mm512_mask_broadcastsd_pd(x, 0x3 << 4, _mm_load_sd(from + 2)); - x = _mm512_mask_broadcastsd_pd(x, 0x3 << 6, _mm_load_sd(from + 3)); - return x; +EIGEN_STRONG_INLINE Packet8l ploaddup<Packet8l>(const int64_t* from) { + Packet8l tmp = _mm512_castsi256_si512(ploadu<Packet4l>(from)); + const Packet8l scatter_mask = _mm512_set_epi64(3, 3, 2, 2, 1, 1, 0, 0); + return _mm512_permutexvar_epi64(scatter_mask, tmp); } -#endif // Loads 8 integers from memory and returns the packet // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16i ploaddup<Packet16i>(const int* from) { - __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); + __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); return _mm512_castps_si512(pairs); @@ -929,6 +1066,17 @@ return _mm512_insertf64x4(tmp, lane1, 1); } +// Loads 2 int64_t from memory a returns the packet +// {a0, a0 a0, a0, a1, a1, a1, a1} +template <> +EIGEN_STRONG_INLINE Packet8l ploadquad<Packet8l>(const int64_t* from) { + __m256i lane0 = _mm256_set1_epi64x(*from); + __m256i lane1 = _mm256_set1_epi64x(*(from + 1)); + __m512i tmp = _mm512_undefined_epi32(); + tmp = _mm512_inserti64x4(tmp, lane0, 0); + return _mm512_inserti64x4(tmp, lane1, 1); +} + // Loads 4 integers from memory and returns the packet // {a0, a0 a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3} template <> @@ -948,7 +1096,11 @@ } template <> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to), from); + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_epi32(to, from); +} +template <> +EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet8l& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_epi64(to, from); } template <> @@ -961,7 +1113,11 @@ } template <> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) { - EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to), from); + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi32(to, from); +} +template <> +EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet8l& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi64(to, from); } template <> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16_t umask) { @@ -1015,6 +1171,14 @@ return _mm512_i32gather_pd(indices, from, 8); } template <> +EIGEN_DEVICE_FUNC inline Packet8l pgather<int64_t, Packet8l>(const int64_t* from, Index stride) { + Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride)); + Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier); + + return _mm512_i32gather_epi64(indices, from, 8); +} +template <> EIGEN_DEVICE_FUNC inline Packet16i pgather<int, Packet16i>(const int* from, Index stride) { Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride)); Packet16i stride_multiplier = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -1043,7 +1207,6 @@ __mmask8 mask = static_cast<__mmask8>(umask); _mm512_mask_i32scatter_pd(to, mask, indices, from, 8); } - template <> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to, const Packet16f& from, Index stride) { Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride)); @@ -1059,6 +1222,13 @@ _mm512_i32scatter_pd(to, indices, from, 8); } template <> +EIGEN_DEVICE_FUNC inline void pscatter<int64_t, Packet8l>(int64_t* to, const Packet8l& from, Index stride) { + Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride)); + Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); + Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier); + _mm512_i32scatter_epi64(to, indices, from, 8); +} +template <> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet16i>(int* to, const Packet16i& from, Index stride) { Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride)); Packet16i stride_multiplier = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -1081,6 +1251,11 @@ Packet16i pa = pset1<Packet16i>(a); pstore(to, pa); } +template <> +EIGEN_STRONG_INLINE void pstore1<Packet8l>(int64_t* to, const int64_t& a) { + Packet8l pa = pset1<Packet8l>(a); + pstore(to, pa); +} template <> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { @@ -1097,15 +1272,20 @@ template <> EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) { - return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0)); + return _mm512_cvtss_f32(a); } template <> EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) { - return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0)); + return _mm512_cvtsd_f64(a); +} +template <> +EIGEN_STRONG_INLINE int64_t pfirst<Packet8l>(const Packet8l& a) { + int64_t x = _mm_extract_epi64_0(_mm512_extracti32x4_epi32(a, 0)); + return x; } template <> EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) { - return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0); + return _mm512_cvtsi512_si32(a); } template <> @@ -1124,6 +1304,11 @@ } template <> +EIGEN_STRONG_INLINE Packet8l preverse(const Packet8l& a) { + return _mm512_permutexvar_epi64(_mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7), a); +} + +template <> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) { // _mm512_abs_ps intrinsic not found, so hack around it return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff))); @@ -1137,6 +1322,10 @@ EIGEN_STRONG_INLINE Packet16i pabs(const Packet16i& a) { return _mm512_abs_epi32(a); } +template <> +EIGEN_STRONG_INLINE Packet8l pabs(const Packet8l& a) { + return _mm512_abs_epi64(a); +} template <> EIGEN_STRONG_INLINE Packet16h psignbit(const Packet16h& a) { @@ -1268,9 +1457,7 @@ __m128 lane2 = _mm512_extractf32x4_ps(a, 2); __m128 lane3 = _mm512_extractf32x4_ps(a, 3); __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3)); - sum = _mm_hadd_ps(sum, sum); - sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1)); - return _mm_cvtss_f32(sum); + return predux<Packet4f>(sum); #endif } template <> @@ -1278,26 +1465,17 @@ __m256d lane0 = _mm512_extractf64x4_pd(a, 0); __m256d lane1 = _mm512_extractf64x4_pd(a, 1); __m256d sum = _mm256_add_pd(lane0, lane1); - __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); - return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0))); + return predux<Packet4d>(sum); } + +template <> +EIGEN_STRONG_INLINE int64_t predux<Packet8l>(const Packet8l& a) { + return _mm512_reduce_add_epi64(a); +} + template <> EIGEN_STRONG_INLINE int predux<Packet16i>(const Packet16i& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256i lane0 = _mm512_extracti32x8_epi32(a, 0); - __m256i lane1 = _mm512_extracti32x8_epi32(a, 1); - Packet8i x = _mm256_add_epi32(lane0, lane1); - return predux<Packet8i>(x); -#else - __m128i lane0 = _mm512_extracti32x4_epi32(a, 0); - __m128i lane1 = _mm512_extracti32x4_epi32(a, 1); - __m128i lane2 = _mm512_extracti32x4_epi32(a, 2); - __m128i lane3 = _mm512_extracti32x4_epi32(a, 3); - __m128i sum = _mm_add_epi32(_mm_add_epi32(lane0, lane1), _mm_add_epi32(lane2, lane3)); - sum = _mm_hadd_epi32(sum, sum); - sum = _mm_hadd_epi32(sum, _mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(sum), 1))); - return _mm_cvtsi128_si32(sum); -#endif + return _mm512_reduce_add_epi32(a); } template <> @@ -1340,6 +1518,13 @@ } template <> +EIGEN_STRONG_INLINE Packet4l predux_half_dowto4<Packet8l>(const Packet8l& a) { + __m256i lane0 = _mm512_extracti64x4_epi64(a, 0); + __m256i lane1 = _mm512_extracti64x4_epi64(a, 1); + return _mm256_add_epi64(lane0, lane1); +} + +template <> EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) { // #ifdef EIGEN_VECTORIZE_AVX512DQ #if 0 @@ -1367,6 +1552,14 @@ res = pmul(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1))); } +template <> +EIGEN_STRONG_INLINE int predux_mul<Packet16i>(const Packet16i& a) { + return _mm512_reduce_mul_epi32(a); +} +template <> +EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) { + return _mm512_reduce_mul_epi64(a); +} template <> EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) { @@ -1386,6 +1579,14 @@ res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1))); } +template <> +EIGEN_STRONG_INLINE int predux_min<Packet16i>(const Packet16i& a) { + return _mm512_reduce_min_epi32(a); +} +template <> +EIGEN_STRONG_INLINE int64_t predux_min<Packet8l>(const Packet8l& a) { + return _mm512_reduce_min_epi64(a); +} template <> EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) { @@ -1406,6 +1607,14 @@ res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } +template <> +EIGEN_STRONG_INLINE int predux_max<Packet16i>(const Packet16i& a) { + return _mm512_reduce_max_epi32(a); +} +template <> +EIGEN_STRONG_INLINE int64_t predux_max<Packet8l>(const Packet8l& a) { + return _mm512_reduce_max_epi64(a); +} template <> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) { @@ -1617,6 +1826,10 @@ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1); +#define PACK_OUTPUT_L(OUTPUT, INPUT, INDEX, STRIDE) \ + OUTPUT[INDEX] = _mm512_inserti64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \ + OUTPUT[INDEX] = _mm512_inserti64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1); + EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) { __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0); __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff); @@ -1695,6 +1908,88 @@ kernel.packet[7] = T7; } +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8l, 4>& kernel) { + __m512i T0 = _mm512_castpd_si512( + _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[0]), _mm512_castsi512_pd(kernel.packet[1]), 0)); + __m512i T1 = _mm512_castpd_si512( + _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[0]), _mm512_castsi512_pd(kernel.packet[1]), 0xff)); + __m512i T2 = _mm512_castpd_si512( + _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[2]), _mm512_castsi512_pd(kernel.packet[3]), 0)); + __m512i T3 = _mm512_castpd_si512( + _mm512_shuffle_pd(_mm512_castsi512_pd(kernel.packet[2]), _mm512_castsi512_pd(kernel.packet[3]), 0xff)); + + PacketBlock<Packet4l, 8> tmp; + + tmp.packet[0] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 0), _mm512_extracti64x4_epi64(T2, 0), 0x20); + tmp.packet[1] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 0), _mm512_extracti64x4_epi64(T3, 0), 0x20); + tmp.packet[2] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 0), _mm512_extracti64x4_epi64(T2, 0), 0x31); + tmp.packet[3] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 0), _mm512_extracti64x4_epi64(T3, 0), 0x31); + + tmp.packet[4] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 1), _mm512_extracti64x4_epi64(T2, 1), 0x20); + tmp.packet[5] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 1), _mm512_extracti64x4_epi64(T3, 1), 0x20); + tmp.packet[6] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T0, 1), _mm512_extracti64x4_epi64(T2, 1), 0x31); + tmp.packet[7] = _mm256_permute2x128_si256(_mm512_extracti64x4_epi64(T1, 1), _mm512_extracti64x4_epi64(T3, 1), 0x31); + + PACK_OUTPUT_L(kernel.packet, tmp.packet, 0, 1); + PACK_OUTPUT_L(kernel.packet, tmp.packet, 1, 1); + PACK_OUTPUT_L(kernel.packet, tmp.packet, 2, 1); + PACK_OUTPUT_L(kernel.packet, tmp.packet, 3, 1); +} + +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8l, 8>& kernel) { + __m512i T0 = _mm512_unpacklo_epi64(kernel.packet[0], kernel.packet[1]); + __m512i T1 = _mm512_unpackhi_epi64(kernel.packet[0], kernel.packet[1]); + __m512i T2 = _mm512_unpacklo_epi64(kernel.packet[2], kernel.packet[3]); + __m512i T3 = _mm512_unpackhi_epi64(kernel.packet[2], kernel.packet[3]); + __m512i T4 = _mm512_unpacklo_epi64(kernel.packet[4], kernel.packet[5]); + __m512i T5 = _mm512_unpackhi_epi64(kernel.packet[4], kernel.packet[5]); + __m512i T6 = _mm512_unpacklo_epi64(kernel.packet[6], kernel.packet[7]); + __m512i T7 = _mm512_unpackhi_epi64(kernel.packet[6], kernel.packet[7]); + + kernel.packet[0] = _mm512_permutex_epi64(T2, 0x4E); + kernel.packet[0] = _mm512_mask_blend_epi64(0xCC, T0, kernel.packet[0]); + kernel.packet[2] = _mm512_permutex_epi64(T0, 0x4E); + kernel.packet[2] = _mm512_mask_blend_epi64(0xCC, kernel.packet[2], T2); + kernel.packet[1] = _mm512_permutex_epi64(T3, 0x4E); + kernel.packet[1] = _mm512_mask_blend_epi64(0xCC, T1, kernel.packet[1]); + kernel.packet[3] = _mm512_permutex_epi64(T1, 0x4E); + kernel.packet[3] = _mm512_mask_blend_epi64(0xCC, kernel.packet[3], T3); + kernel.packet[4] = _mm512_permutex_epi64(T6, 0x4E); + kernel.packet[4] = _mm512_mask_blend_epi64(0xCC, T4, kernel.packet[4]); + kernel.packet[6] = _mm512_permutex_epi64(T4, 0x4E); + kernel.packet[6] = _mm512_mask_blend_epi64(0xCC, kernel.packet[6], T6); + kernel.packet[5] = _mm512_permutex_epi64(T7, 0x4E); + kernel.packet[5] = _mm512_mask_blend_epi64(0xCC, T5, kernel.packet[5]); + kernel.packet[7] = _mm512_permutex_epi64(T5, 0x4E); + kernel.packet[7] = _mm512_mask_blend_epi64(0xCC, kernel.packet[7], T7); + + T0 = _mm512_shuffle_i64x2(kernel.packet[4], kernel.packet[4], 0x4E); + T0 = _mm512_mask_blend_epi64(0xF0, kernel.packet[0], T0); + T4 = _mm512_shuffle_i64x2(kernel.packet[0], kernel.packet[0], 0x4E); + T4 = _mm512_mask_blend_epi64(0xF0, T4, kernel.packet[4]); + T1 = _mm512_shuffle_i64x2(kernel.packet[5], kernel.packet[5], 0x4E); + T1 = _mm512_mask_blend_epi64(0xF0, kernel.packet[1], T1); + T5 = _mm512_shuffle_i64x2(kernel.packet[1], kernel.packet[1], 0x4E); + T5 = _mm512_mask_blend_epi64(0xF0, T5, kernel.packet[5]); + T2 = _mm512_shuffle_i64x2(kernel.packet[6], kernel.packet[6], 0x4E); + T2 = _mm512_mask_blend_epi64(0xF0, kernel.packet[2], T2); + T6 = _mm512_shuffle_i64x2(kernel.packet[2], kernel.packet[2], 0x4E); + T6 = _mm512_mask_blend_epi64(0xF0, T6, kernel.packet[6]); + T3 = _mm512_shuffle_i64x2(kernel.packet[7], kernel.packet[7], 0x4E); + T3 = _mm512_mask_blend_epi64(0xF0, kernel.packet[3], T3); + T7 = _mm512_shuffle_i64x2(kernel.packet[3], kernel.packet[3], 0x4E); + T7 = _mm512_mask_blend_epi64(0xF0, T7, kernel.packet[7]); + + kernel.packet[0] = T0; + kernel.packet[1] = T1; + kernel.packet[2] = T2; + kernel.packet[3] = T3; + kernel.packet[4] = T4; + kernel.packet[5] = T5; + kernel.packet[6] = T6; + kernel.packet[7] = T7; +} + #define PACK_OUTPUT_I32(OUTPUT, INPUT, INDEX, STRIDE) \ EIGEN_INSERT_8i_INTO_16i(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]); @@ -1852,20 +2147,13 @@ template <> EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ifPacket, const Packet16f& thenPacket, const Packet16f& elsePacket) { - __mmask16 m = (ifPacket.select[0]) | (ifPacket.select[1] << 1) | (ifPacket.select[2] << 2) | - (ifPacket.select[3] << 3) | (ifPacket.select[4] << 4) | (ifPacket.select[5] << 5) | - (ifPacket.select[6] << 6) | (ifPacket.select[7] << 7) | (ifPacket.select[8] << 8) | - (ifPacket.select[9] << 9) | (ifPacket.select[10] << 10) | (ifPacket.select[11] << 11) | - (ifPacket.select[12] << 12) | (ifPacket.select[13] << 13) | (ifPacket.select[14] << 14) | - (ifPacket.select[15] << 15); + __mmask16 m = ifPacket.mask<__mmask16>(); return _mm512_mask_blend_ps(m, elsePacket, thenPacket); } template <> EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket, const Packet8d& thenPacket, const Packet8d& elsePacket) { - __mmask8 m = (ifPacket.select[0]) | (ifPacket.select[1] << 1) | (ifPacket.select[2] << 2) | - (ifPacket.select[3] << 3) | (ifPacket.select[4] << 4) | (ifPacket.select[5] << 5) | - (ifPacket.select[6] << 6) | (ifPacket.select[7] << 7); + __mmask8 m = ifPacket.mask<__mmask8>(); return _mm512_mask_blend_pd(m, elsePacket, thenPacket); }
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h index ccdb563..b16e9f6 100644 --- a/Eigen/src/Core/arch/AVX512/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -37,6 +37,11 @@ template <> struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {}; +template <> +struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {}; +template <> +struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {}; + #ifndef EIGEN_VECTORIZE_AVX512FP16 template <> struct type_casting_traits<half, float> : vectorized_type_casting_traits<half, float> {}; @@ -76,6 +81,19 @@ } template <> +EIGEN_STRONG_INLINE Packet8l pcast<Packet8d, Packet8l>(const Packet8d& a) { +#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL) + return _mm512_cvttpd_epi64(a); +#else + EIGEN_ALIGN16 double aux[8]; + pstore(aux, a); + return _mm512_set_epi64(static_cast<int64_t>(aux[7]), static_cast<int64_t>(aux[6]), static_cast<int64_t>(aux[5]), + static_cast<int64_t>(aux[4]), static_cast<int64_t>(aux[3]), static_cast<int64_t>(aux[2]), + static_cast<int64_t>(aux[1]), static_cast<int64_t>(aux[0])); +#endif +} + +template <> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) { return _mm512_cvtepi32_ps(a); } @@ -91,6 +109,19 @@ } template <> +EIGEN_STRONG_INLINE Packet8d pcast<Packet8l, Packet8d>(const Packet8l& a) { +#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL) + return _mm512_cvtepi64_pd(a); +#else + EIGEN_ALIGN16 int64_t aux[8]; + pstore(aux, a); + return _mm512_set_pd(static_cast<double>(aux[7]), static_cast<double>(aux[6]), static_cast<double>(aux[5]), + static_cast<double>(aux[4]), static_cast<double>(aux[3]), static_cast<double>(aux[2]), + static_cast<double>(aux[1]), static_cast<double>(aux[0])); +#endif +} + +template <> EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet8d& a, const Packet8d& b) { return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b)); } @@ -125,6 +156,16 @@ } template <> +EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet8l>(const Packet8l& a) { + return _mm512_castsi512_pd(a); +} + +template <> +EIGEN_STRONG_INLINE Packet8l preinterpret<Packet8l, Packet8d>(const Packet8d& a) { + return _mm512_castpd_si512(a); +} + +template <> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8d>(const Packet8d& a) { return _mm512_castpd_ps(a); }
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 78dbf20..c973efd 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -129,8 +129,8 @@ const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent)); PacketI b = parithmetic_shift_right<2>(e); // floor(e/4); Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^b - Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b) - b = psub(psub(psub(e, b), b), b); // e - 3b + Packet out = pmul(pmul(a, c), pmul(c, c)); // a * 2^(3b) + b = pnmadd(pset1<PacketI>(3), b, e); // e - 3b c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^(e-3*b) out = pmul(out, c); return out;
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 008109a..e91ef4d 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -296,6 +296,7 @@ HasMax = 0, HasConj = 0, HasSqrt = 1, + HasNegate = 0, HasSign = 0 // Don't try to vectorize psign<bool> = identity. }; }; @@ -602,11 +603,6 @@ } template <> -EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) { - return a; -} - -template <> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 2f9b920..8d95819 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -24,7 +24,7 @@ */ template <typename Scalar> struct scalar_opposite_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return -a; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return numext::negate(a); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pnegate(a); @@ -455,8 +455,9 @@ */ template <typename Scalar> struct scalar_log2_op { + using RealScalar = typename NumTraits<Scalar>::Real; EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { - return Scalar(EIGEN_LOG2E) * numext::log(a); + return Scalar(RealScalar(EIGEN_LOG2E)) * numext::log(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const {
diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h index c187002..59486ea 100644 --- a/Eigen/src/Core/util/IndexedViewHelper.h +++ b/Eigen/src/Core/util/IndexedViewHelper.h
@@ -308,6 +308,178 @@ static Index incr(const Indices& indices) { return indices.incr(); } }; +// this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true +template <typename Derived, typename RowIndices, typename ColIndices, typename EnableIf = void> +struct IndexedViewSelector; + +template <typename Indices, int SizeAtCompileTime> +using IvcType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type; + +template <int SizeAtCompileTime, typename Indices> +inline IvcType<Indices, SizeAtCompileTime> CreateIndexSequence(size_t size, const Indices& indices) { + return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices, size); +} + +// Generic +template <typename Derived, typename RowIndices, typename ColIndices> +struct IndexedViewSelector<Derived, RowIndices, ColIndices, + std::enable_if_t<internal::traits< + IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>, + IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsIndexedView>> { + using ReturnType = IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>, + IvcType<ColIndices, Derived::ColsAtCompileTime>>; + using ConstReturnType = IndexedView<const Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>, + IvcType<ColIndices, Derived::ColsAtCompileTime>>; + + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + return ReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices), + CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + return ConstReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices), + CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices)); + } +}; + +// Block +template <typename Derived, typename RowIndices, typename ColIndices> +struct IndexedViewSelector< + Derived, RowIndices, ColIndices, + std::enable_if_t<internal::traits<IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>, + IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsBlock>> { + using ActualRowIndices = IvcType<RowIndices, Derived::RowsAtCompileTime>; + using ActualColIndices = IvcType<ColIndices, Derived::ColsAtCompileTime>; + using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>; + using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>; + using ReturnType = typename internal::traits<IndexedViewType>::BlockType; + using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType; + using RowHelper = internal::IndexedViewHelper<ActualRowIndices>; + using ColHelper = internal::IndexedViewHelper<ActualColIndices>; + + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices); + return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), + RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices); + return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), + RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); + } +}; + +// Scalar +template <typename Derived, typename RowIndices, typename ColIndices> +struct IndexedViewSelector< + Derived, RowIndices, ColIndices, + std::enable_if_t<internal::traits<IndexedView<Derived, IvcType<RowIndices, Derived::RowsAtCompileTime>, + IvcType<ColIndices, Derived::ColsAtCompileTime>>>::ReturnAsScalar>> { + using ReturnType = typename DenseBase<Derived>::Scalar&; + using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType; + using ActualRowIndices = IvcType<RowIndices, Derived::RowsAtCompileTime>; + using ActualColIndices = IvcType<ColIndices, Derived::ColsAtCompileTime>; + using RowHelper = internal::IndexedViewHelper<ActualRowIndices>; + using ColHelper = internal::IndexedViewHelper<ActualColIndices>; + static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices); + return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); + } + static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, + const ColIndices& colIndices) { + auto actualRowIndices = CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), rowIndices); + auto actualColIndices = CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), colIndices); + return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); + } +}; + +// this helper class assumes internal::is_valid_index_type<Indices>::value == false +template <typename Derived, typename Indices, typename EnableIf = void> +struct VectorIndexedViewSelector; + +// Generic +template <typename Derived, typename Indices> +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t<!internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value && + internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>::IncrAtCompileTime != + 1>> { + static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor; + using ZeroIndex = internal::SingleRange<Index(0)>; + using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcType<Indices, Derived::SizeAtCompileTime>>; + using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcType<Indices, Derived::SizeAtCompileTime>>; + + using ColMajorReturnType = IndexedView<Derived, IvcType<Indices, Derived::SizeAtCompileTime>, ZeroIndex>; + using ConstColMajorReturnType = IndexedView<const Derived, IvcType<Indices, Derived::SizeAtCompileTime>, ZeroIndex>; + + using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type; + using ConstReturnType = + typename internal::conditional<IsRowMajor, ConstRowMajorReturnType, ConstColMajorReturnType>::type; + + template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true> + static inline RowMajorReturnType run(Derived& derived, const Indices& indices) { + return RowMajorReturnType(derived, ZeroIndex(0), + CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), indices)); + } + template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true> + static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) { + return ConstRowMajorReturnType(derived, ZeroIndex(0), + CreateIndexSequence<Derived::ColsAtCompileTime>(derived.cols(), indices)); + } + template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true> + static inline ColMajorReturnType run(Derived& derived, const Indices& indices) { + return ColMajorReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), indices), + ZeroIndex(0)); + } + template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true> + static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) { + return ConstColMajorReturnType(derived, CreateIndexSequence<Derived::RowsAtCompileTime>(derived.rows(), indices), + ZeroIndex(0)); + } +}; + +// Block +template <typename Derived, typename Indices> +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t<!internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value && + internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>::IncrAtCompileTime == + 1>> { + using Helper = internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>; + using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>; + using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>; + static inline ReturnType run(Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices); + return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); + } + static inline ConstReturnType run(const Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices); + return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); + } +}; + +// Symbolic +template <typename Derived, typename Indices> +struct VectorIndexedViewSelector< + Derived, Indices, + std::enable_if_t<internal::is_single_range<IvcType<Indices, Derived::SizeAtCompileTime>>::value>> { + using ReturnType = typename DenseBase<Derived>::Scalar&; + using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType; + using Helper = internal::IndexedViewHelper<IvcType<Indices, Derived::SizeAtCompileTime>>; + static inline ReturnType run(Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices); + return derived(Helper::first(actualIndices)); + } + static inline ConstReturnType run(const Derived& derived, const Indices& indices) { + auto actualIndices = CreateIndexSequence<Derived::SizeAtCompileTime>(derived.size(), indices); + return derived(Helper::first(actualIndices)); + } +}; + } // end namespace internal } // end namespace Eigen
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 8931c4a..1d8ded9 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h
@@ -57,22 +57,22 @@ typedef AngleAxis<Scalar> AngleAxisType; /** \returns the \c x coefficient */ - EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); } /** \returns the \c y coefficient */ - EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); } /** \returns the \c z coefficient */ - EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); } /** \returns the \c w coefficient */ - EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); } /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */ - EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); } /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */ - EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); } /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */ - EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); } /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */ - EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients, 3> vec() const { return coeffs().template head<3>(); }
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h index 956ef56..682c4c7 100644 --- a/Eigen/src/Geometry/Translation.h +++ b/Eigen/src/Geometry/Translation.h
@@ -69,18 +69,18 @@ EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {} /** \brief Returns the x-translation by value. **/ - EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar x() const { return m_coeffs.x(); } /** \brief Returns the y-translation by value. **/ - EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar y() const { return m_coeffs.y(); } /** \brief Returns the z-translation by value. **/ - EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar z() const { return m_coeffs.z(); } /** \brief Returns the x-translation as a reference. **/ - EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& x() { return m_coeffs.x(); } /** \brief Returns the y-translation as a reference. **/ - EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& y() { return m_coeffs.y(); } /** \brief Returns the z-translation as a reference. **/ - EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Scalar& z() { return m_coeffs.z(); } EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; } EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
diff --git a/Eigen/src/plugins/IndexedViewMethods.inc b/Eigen/src/plugins/IndexedViewMethods.inc index c3df429..a51e349 100644 --- a/Eigen/src/plugins/IndexedViewMethods.inc +++ b/Eigen/src/plugins/IndexedViewMethods.inc
@@ -10,184 +10,6 @@ #if !defined(EIGEN_PARSED_BY_DOXYGEN) public: -// define some aliases to ease readability - -template <typename Indices> -using IvcRowType = typename internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::type; - -template <typename Indices> -using IvcColType = typename internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::type; - -template <typename Indices> -using IvcSizeType = typename internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::type; - -template <typename Indices> -inline IvcRowType<Indices> ivcRow(const Indices& indices) const { - return internal::IndexedViewHelperIndicesWrapper<Indices, RowsAtCompileTime>::CreateIndexSequence(indices, - derived().rows()); -} - -template <typename Indices> -inline IvcColType<Indices> ivcCol(const Indices& indices) const { - return internal::IndexedViewHelperIndicesWrapper<Indices, ColsAtCompileTime>::CreateIndexSequence(indices, - derived().cols()); -} - -template <typename Indices> -inline IvcSizeType<Indices> ivcSize(const Indices& indices) const { - return internal::IndexedViewHelperIndicesWrapper<Indices, SizeAtCompileTime>::CreateIndexSequence(indices, - derived().size()); - ; -} - -// this helper class assumes internal::valid_indexed_view_overload<RowIndices, ColIndices>::value == true -template <typename RowIndices, typename ColIndices, typename EnableIf = void> -struct IndexedViewSelector; - -// Generic -template <typename RowIndices, typename ColIndices> -struct IndexedViewSelector< - RowIndices, ColIndices, - std::enable_if_t< - internal::traits<IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsIndexedView>> { - using ReturnType = IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>; - using ConstReturnType = IndexedView<const Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>; - - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - return ReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices)); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - return ConstReturnType(derived, derived.ivcRow(rowIndices), derived.ivcCol(colIndices)); - } -}; - -// Block -template <typename RowIndices, typename ColIndices> -struct IndexedViewSelector<RowIndices, ColIndices, - std::enable_if_t<internal::traits< - IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsBlock>> { - using ActualRowIndices = IvcRowType<RowIndices>; - using ActualColIndices = IvcColType<ColIndices>; - using IndexedViewType = IndexedView<Derived, ActualRowIndices, ActualColIndices>; - using ConstIndexedViewType = IndexedView<const Derived, ActualRowIndices, ActualColIndices>; - using ReturnType = typename internal::traits<IndexedViewType>::BlockType; - using ConstReturnType = typename internal::traits<ConstIndexedViewType>::BlockType; - using RowHelper = internal::IndexedViewHelper<ActualRowIndices>; - using ColHelper = internal::IndexedViewHelper<ActualColIndices>; - - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - auto actualRowIndices = derived.ivcRow(rowIndices); - auto actualColIndices = derived.ivcCol(colIndices); - return ReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), - RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - auto actualRowIndices = derived.ivcRow(rowIndices); - auto actualColIndices = derived.ivcCol(colIndices); - return ConstReturnType(derived, RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices), - RowHelper::size(actualRowIndices), ColHelper::size(actualColIndices)); - } -}; - -// Scalar -template <typename RowIndices, typename ColIndices> -struct IndexedViewSelector<RowIndices, ColIndices, - std::enable_if_t<internal::traits< - IndexedView<Derived, IvcRowType<RowIndices>, IvcColType<ColIndices>>>::ReturnAsScalar>> { - using ReturnType = typename DenseBase<Derived>::Scalar&; - using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType; - using ActualRowIndices = IvcRowType<RowIndices>; - using ActualColIndices = IvcColType<ColIndices>; - using RowHelper = internal::IndexedViewHelper<ActualRowIndices>; - using ColHelper = internal::IndexedViewHelper<ActualColIndices>; - static inline ReturnType run(Derived& derived, const RowIndices& rowIndices, const ColIndices& colIndices) { - auto actualRowIndices = derived.ivcRow(rowIndices); - auto actualColIndices = derived.ivcCol(colIndices); - return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); - } - static inline ConstReturnType run(const Derived& derived, const RowIndices& rowIndices, - const ColIndices& colIndices) { - auto actualRowIndices = derived.ivcRow(rowIndices); - auto actualColIndices = derived.ivcCol(colIndices); - return derived(RowHelper::first(actualRowIndices), ColHelper::first(actualColIndices)); - } -}; - -// this helper class assumes internal::is_valid_index_type<Indices>::value == false -template <typename Indices, typename EnableIf = void> -struct VectorIndexedViewSelector; - -// Generic -template <typename Indices> -struct VectorIndexedViewSelector< - Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value && - internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime != 1>> { - static constexpr bool IsRowMajor = DenseBase<Derived>::IsRowMajor; - using ZeroIndex = internal::SingleRange<Index(0)>; - using RowMajorReturnType = IndexedView<Derived, ZeroIndex, IvcSizeType<Indices>>; - using ConstRowMajorReturnType = IndexedView<const Derived, ZeroIndex, IvcSizeType<Indices>>; - - using ColMajorReturnType = IndexedView<Derived, IvcSizeType<Indices>, ZeroIndex>; - using ConstColMajorReturnType = IndexedView<const Derived, IvcSizeType<Indices>, ZeroIndex>; - - using ReturnType = typename internal::conditional<IsRowMajor, RowMajorReturnType, ColMajorReturnType>::type; - using ConstReturnType = - typename internal::conditional<IsRowMajor, ConstRowMajorReturnType, ConstColMajorReturnType>::type; - - template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true> - static inline RowMajorReturnType run(Derived& derived, const Indices& indices) { - return RowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices)); - } - template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true> - static inline ConstRowMajorReturnType run(const Derived& derived, const Indices& indices) { - return ConstRowMajorReturnType(derived, ZeroIndex(0), derived.ivcCol(indices)); - } - template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true> - static inline ColMajorReturnType run(Derived& derived, const Indices& indices) { - return ColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0)); - } - template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true> - static inline ConstColMajorReturnType run(const Derived& derived, const Indices& indices) { - return ConstColMajorReturnType(derived, derived.ivcRow(indices), ZeroIndex(0)); - } -}; - -// Block -template <typename Indices> -struct VectorIndexedViewSelector< - Indices, std::enable_if_t<!internal::is_single_range<IvcSizeType<Indices>>::value && - internal::IndexedViewHelper<IvcSizeType<Indices>>::IncrAtCompileTime == 1>> { - using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>; - using ReturnType = VectorBlock<Derived, Helper::SizeAtCompileTime>; - using ConstReturnType = VectorBlock<const Derived, Helper::SizeAtCompileTime>; - static inline ReturnType run(Derived& derived, const Indices& indices) { - auto actualIndices = derived.ivcSize(indices); - return ReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); - } - static inline ConstReturnType run(const Derived& derived, const Indices& indices) { - auto actualIndices = derived.ivcSize(indices); - return ConstReturnType(derived, Helper::first(actualIndices), Helper::size(actualIndices)); - } -}; - -// Symbolic -template <typename Indices> -struct VectorIndexedViewSelector<Indices, std::enable_if_t<internal::is_single_range<IvcSizeType<Indices>>::value>> { - using ReturnType = typename DenseBase<Derived>::Scalar&; - using ConstReturnType = typename DenseBase<Derived>::CoeffReturnType; - using Helper = internal::IndexedViewHelper<IvcSizeType<Indices>>; - static inline ReturnType run(Derived& derived, const Indices& indices) { - auto actualIndices = derived.ivcSize(indices); - return derived(Helper::first(actualIndices)); - } - static inline ConstReturnType run(const Derived& derived, const Indices& indices) { - auto actualIndices = derived.ivcSize(indices); - return derived(Helper::first(actualIndices)); - } -}; - // SFINAE dummy types template <typename RowIndices, typename ColIndices> @@ -210,24 +32,26 @@ // non-const versions -template <typename RowIndices, typename ColIndices> -using IndexedViewType = typename IndexedViewSelector<RowIndices, ColIndices>::ReturnType; + template <typename RowIndices, typename ColIndices> + using IndexedViewType = typename internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::ReturnType; -template <typename RowIndices, typename ColIndices, EnableOverload<RowIndices, ColIndices> = true> -IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColIndices& colIndices) { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, colIndices); -} + template <typename RowIndices, typename ColIndices, EnableOverload<RowIndices, ColIndices> = true> + IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColIndices& colIndices) { + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, colIndices); + } template <typename RowType, size_t RowSize, typename ColIndices, typename RowIndices = Array<RowType, RowSize, 1>, EnableOverload<RowIndices, ColIndices> = true> IndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize], const ColIndices& colIndices) { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, colIndices); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, + colIndices); } template <typename RowIndices, typename ColType, size_t ColSize, typename ColIndices = Array<ColType, ColSize, 1>, EnableOverload<RowIndices, ColIndices> = true> IndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColType (&colIndices)[ColSize]) { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, ColIndices{colIndices}); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, + ColIndices{colIndices}); } template <typename RowType, size_t RowSize, typename ColType, size_t ColSize, @@ -235,32 +59,35 @@ EnableOverload<RowIndices, ColIndices> = true> IndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize], const ColType (&colIndices)[ColSize]) { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices}); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, + ColIndices{colIndices}); } // const versions template <typename RowIndices, typename ColIndices> -using ConstIndexedViewType = typename IndexedViewSelector<RowIndices, ColIndices>::ConstReturnType; +using ConstIndexedViewType = typename internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::ConstReturnType; template <typename RowIndices, typename ColIndices, EnableConstOverload<RowIndices, ColIndices> = true> ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColIndices& colIndices) const { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, colIndices); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, colIndices); } template <typename RowType, size_t RowSize, typename ColIndices, typename RowIndices = Array<RowType, RowSize, 1>, EnableConstOverload<RowIndices, ColIndices> = true> ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize], const ColIndices& colIndices) const { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, colIndices); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, + colIndices); } template <typename RowIndices, typename ColType, size_t ColSize, typename ColIndices = Array<ColType, ColSize, 1>, EnableConstOverload<RowIndices, ColIndices> = true> ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowIndices& rowIndices, const ColType (&colIndices)[ColSize]) const { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), rowIndices, ColIndices{colIndices}); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), rowIndices, + ColIndices{colIndices}); } template <typename RowType, size_t RowSize, typename ColType, size_t ColSize, @@ -268,7 +95,8 @@ EnableConstOverload<RowIndices, ColIndices> = true> ConstIndexedViewType<RowIndices, ColIndices> operator()(const RowType (&rowIndices)[RowSize], const ColType (&colIndices)[ColSize]) const { - return IndexedViewSelector<RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, ColIndices{colIndices}); + return internal::IndexedViewSelector<Derived, RowIndices, ColIndices>::run(derived(), RowIndices{rowIndices}, + ColIndices{colIndices}); } // Public API for 1D vectors/arrays @@ -276,37 +104,37 @@ // non-const versions template <typename Indices> -using VectorIndexedViewType = typename VectorIndexedViewSelector<Indices>::ReturnType; +using VectorIndexedViewType = typename internal::VectorIndexedViewSelector<Derived, Indices>::ReturnType; template <typename Indices, EnableVectorOverload<Indices> = true> VectorIndexedViewType<Indices> operator()(const Indices& indices) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector<Indices>::run(derived(), indices); + return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), indices); } template <typename IndexType, size_t Size, typename Indices = Array<IndexType, Size, 1>, EnableVectorOverload<Indices> = true> VectorIndexedViewType<Indices> operator()(const IndexType (&indices)[Size]) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector<Indices>::run(derived(), Indices{indices}); + return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), Indices{indices}); } // const versions template <typename Indices> -using ConstVectorIndexedViewType = typename VectorIndexedViewSelector<Indices>::ConstReturnType; +using ConstVectorIndexedViewType = typename internal::VectorIndexedViewSelector<Derived, Indices>::ConstReturnType; template <typename Indices, EnableConstVectorOverload<Indices> = true> ConstVectorIndexedViewType<Indices> operator()(const Indices& indices) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector<Indices>::run(derived(), indices); + return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), indices); } template <typename IndexType, size_t Size, typename Indices = Array<IndexType, Size, 1>, EnableConstVectorOverload<Indices> = true> ConstVectorIndexedViewType<Indices> operator()(const IndexType (&indices)[Size]) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorIndexedViewSelector<Indices>::run(derived(), Indices{indices}); + return internal::VectorIndexedViewSelector<Derived, Indices>::run(derived(), Indices{indices}); } #else // EIGEN_PARSED_BY_DOXYGEN
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index 543ef2e..9fb104c 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp
@@ -614,9 +614,9 @@ VERIFY(o2.cols() == cols); ArrayType2 o3(rows, cols); - VERIFY(o3(0) == Scalar(rows) && o3(1) == Scalar(cols)); + VERIFY(o3(0) == RealScalar(rows) && o3(1) == RealScalar(cols)); ArrayType2 o4(static_cast<int>(rows), static_cast<int>(cols)); - VERIFY(o4(0) == Scalar(rows) && o4(1) == Scalar(cols)); + VERIFY(o4(0) == RealScalar(rows) && o4(1) == RealScalar(cols)); } { TwoDArrayType o1{rows, cols}; @@ -627,9 +627,9 @@ VERIFY(o2.cols() == cols); ArrayType2 o3{rows, cols}; - VERIFY(o3(0) == Scalar(rows) && o3(1) == Scalar(cols)); + VERIFY(o3(0) == RealScalar(rows) && o3(1) == RealScalar(cols)); ArrayType2 o4{int(rows), int(cols)}; - VERIFY(o4(0) == Scalar(rows) && o4(1) == Scalar(cols)); + VERIFY(o4(0) == RealScalar(rows) && o4(1) == RealScalar(cols)); } }
diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp index 922a6d1..5be49d9 100644 --- a/test/bfloat16_float.cpp +++ b/test/bfloat16_float.cpp
@@ -82,7 +82,7 @@ // Conversion to bool VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(3)), true); VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.33333f)), true); - VERIFY_IS_EQUAL(bfloat16(-0.0), false); + VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(-0.0)), false); VERIFY_IS_EQUAL(static_cast<bool>(bfloat16(0.0)), false); // Explicit conversion to float.
diff --git a/test/geo_eulerangles.cpp b/test/geo_eulerangles.cpp index 3d443de..11c0449 100644 --- a/test/geo_eulerangles.cpp +++ b/test/geo_eulerangles.cpp
@@ -23,6 +23,7 @@ typedef AngleAxis<Scalar> AngleAxisx; const Matrix3 m(AngleAxisx(ea[0], Vector3::Unit(i)) * AngleAxisx(ea[1], Vector3::Unit(j)) * AngleAxisx(ea[2], Vector3::Unit(k))); + const Scalar kPi = Scalar(EIGEN_PI); // Test non-canonical eulerAngles { @@ -33,11 +34,11 @@ // approx_or_less_than does not work for 0 VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI)); - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[1]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI)); - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], kPi); + VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[1]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], kPi); + VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[2]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], kPi); } // Test canonicalEulerAngles @@ -47,20 +48,20 @@ AngleAxisx(eabis[2], Vector3::Unit(k))); VERIFY_IS_APPROX(m, mbis); - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[0]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[0]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], kPi); if (i != k) { // Tait-Bryan sequence - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI / 2), eabis[1]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI / 2)); + VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(kPi / 2), eabis[1]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(kPi / 2)); } else { // Proper Euler sequence // approx_or_less_than does not work for 0 VERIFY(0 < eabis[1] || test_isMuchSmallerThan(eabis[1], Scalar(1))); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], kPi); } - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-kPi, eabis[2]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], kPi); } } @@ -100,7 +101,10 @@ typedef Quaternion<Scalar> Quaternionx; typedef AngleAxis<Scalar> AngleAxisx; - Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + const Scalar kPi = Scalar(EIGEN_PI); + const Scalar smallVal = static_cast<Scalar>(0.001); + + Scalar a = internal::random<Scalar>(-kPi, kPi); Quaternionx q1; q1 = AngleAxisx(a, Vector3::Random().normalized()); Matrix3 m; @@ -120,65 +124,65 @@ check_all_var(ea); // Check with random angles in range [-pi:pi]x[-pi:pi]x[-pi:pi]. - ea = Array3::Random() * Scalar(EIGEN_PI); + ea = Array3::Random() * kPi; check_all_var(ea); - auto test_with_some_zeros = [](const Vector3& eaz) { + auto test_with_some_zeros = [=](const Vector3& eaz) { check_all_var(eaz); Vector3 ea_glz = eaz; ea_glz[0] = Scalar(0); check_all_var(ea_glz); - ea_glz[0] = internal::random<Scalar>(-0.001, 0.001); + ea_glz[0] = internal::random<Scalar>(-smallVal, smallVal); check_all_var(ea_glz); ea_glz[2] = Scalar(0); check_all_var(ea_glz); - ea_glz[2] = internal::random<Scalar>(-0.001, 0.001); + ea_glz[2] = internal::random<Scalar>(-smallVal, smallVal); check_all_var(ea_glz); }; // Check gimbal lock configurations and a bit noisy gimbal locks Vector3 ea_gl = ea; - ea_gl[1] = EIGEN_PI / 2; + ea_gl[1] = kPi / 2; test_with_some_zeros(ea_gl); - ea_gl[1] += internal::random<Scalar>(-0.001, 0.001); + ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_gl[1] = -EIGEN_PI / 2; + ea_gl[1] = -kPi / 2; test_with_some_zeros(ea_gl); - ea_gl[1] += internal::random<Scalar>(-0.001, 0.001); + ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_gl[1] = EIGEN_PI / 2; + ea_gl[1] = kPi / 2; ea_gl[2] = ea_gl[0]; test_with_some_zeros(ea_gl); - ea_gl[1] += internal::random<Scalar>(-0.001, 0.001); + ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_gl[1] = -EIGEN_PI / 2; + ea_gl[1] = -kPi / 2; test_with_some_zeros(ea_gl); - ea_gl[1] += internal::random<Scalar>(-0.001, 0.001); + ea_gl[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); // Similar to above, but with pi instead of pi/2 Vector3 ea_pi = ea; - ea_pi[1] = EIGEN_PI; + ea_pi[1] = kPi; test_with_some_zeros(ea_gl); - ea_pi[1] += internal::random<Scalar>(-0.001, 0.001); + ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_pi[1] = -EIGEN_PI; + ea_pi[1] = -kPi; test_with_some_zeros(ea_gl); - ea_pi[1] += internal::random<Scalar>(-0.001, 0.001); + ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_pi[1] = EIGEN_PI; + ea_pi[1] = kPi; ea_pi[2] = ea_pi[0]; test_with_some_zeros(ea_gl); - ea_pi[1] += internal::random<Scalar>(-0.001, 0.001); + ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea_pi[1] = -EIGEN_PI; + ea_pi[1] = -kPi; test_with_some_zeros(ea_gl); - ea_pi[1] += internal::random<Scalar>(-0.001, 0.001); + ea_pi[1] += internal::random<Scalar>(-smallVal, smallVal); test_with_some_zeros(ea_gl); - ea[2] = ea[0] = internal::random<Scalar>(0, Scalar(EIGEN_PI)); + ea[2] = ea[0] = internal::random<Scalar>(0, kPi); check_all_var(ea); - ea[0] = ea[1] = internal::random<Scalar>(0, Scalar(EIGEN_PI)); + ea[0] = ea[1] = internal::random<Scalar>(0, kPi); check_all_var(ea); ea[1] = 0;
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp index f165e8b..064cc4a 100644 --- a/test/indexed_view.cpp +++ b/test/indexed_view.cpp
@@ -447,7 +447,7 @@ // Check compilation of varying integer types as index types: Index i = n / 2; - short i_short(i); + short i_short = static_cast<short>(i); std::size_t i_sizet(i); VERIFY_IS_EQUAL(a(i), a.coeff(i_short)); VERIFY_IS_EQUAL(a(i), a.coeff(i_sizet)); @@ -790,6 +790,7 @@ VERIFY_IS_EQUAL(int(slice1.SizeAtCompileTime), 6); VERIFY_IS_EQUAL(int(slice2.SizeAtCompileTime), 6); auto slice3 = A(all, seq(fix<0>, last, fix<2>)); + TEST_SET_BUT_UNUSED_VARIABLE(slice3) VERIFY_IS_EQUAL(int(slice3.RowsAtCompileTime), kRows); VERIFY_IS_EQUAL(int(slice3.ColsAtCompileTime), (kCols + 1) / 2); } @@ -812,7 +813,7 @@ { std::vector<int> ind{4, 2, 5, 5, 3}; auto slice1 = A(all, ind); - for (int i = 0; i < ind.size(); ++i) { + for (size_t i = 0; i < ind.size(); ++i) { VERIFY_IS_EQUAL(slice1.col(i), A.col(ind[i])); }
diff --git a/test/mapstaticmethods.cpp b/test/mapstaticmethods.cpp index ac90bdc..b18b24a 100644 --- a/test/mapstaticmethods.cpp +++ b/test/mapstaticmethods.cpp
@@ -9,21 +9,14 @@ #include "main.h" -// GCC<=4.8 has spurious shadow warnings, because `ptr` re-appears inside template instantiations -// workaround: put these in an anonymous namespace -namespace { -float* ptr; -const float* const_ptr; -} // namespace - template <typename PlainObjectType, bool IsDynamicSize = PlainObjectType::SizeAtCompileTime == Dynamic, bool IsVector = PlainObjectType::IsVectorAtCompileTime> struct mapstaticmethods_impl {}; template <typename PlainObjectType, bool IsVector> struct mapstaticmethods_impl<PlainObjectType, false, IsVector> { - static void run(const PlainObjectType& m) { - mapstaticmethods_impl<PlainObjectType, true, IsVector>::run(m); + static void run(const PlainObjectType& m, float* ptr, const float* const_ptr) { + mapstaticmethods_impl<PlainObjectType, true, IsVector>::run(m, ptr, const_ptr); int i = internal::random<int>(2, 5), j = internal::random<int>(2, 5); @@ -66,7 +59,7 @@ template <typename PlainObjectType> struct mapstaticmethods_impl<PlainObjectType, true, false> { - static void run(const PlainObjectType& m) { + static void run(const PlainObjectType& m, float* ptr, const float* const_ptr) { Index rows = m.rows(), cols = m.cols(); int i = internal::random<int>(2, 5), j = internal::random<int>(2, 5); @@ -110,7 +103,7 @@ template <typename PlainObjectType> struct mapstaticmethods_impl<PlainObjectType, true, true> { - static void run(const PlainObjectType& v) { + static void run(const PlainObjectType& v, float* ptr, const float* const_ptr) { Index size = v.size(); int i = internal::random<int>(2, 5); @@ -133,34 +126,34 @@ }; template <typename PlainObjectType> -void mapstaticmethods(const PlainObjectType& m) { - mapstaticmethods_impl<PlainObjectType>::run(m); +void mapstaticmethods(const PlainObjectType& m, float* ptr, const float* const_ptr) { + mapstaticmethods_impl<PlainObjectType>::run(m, ptr, const_ptr); VERIFY(true); // just to avoid 'unused function' warning } EIGEN_DECLARE_TEST(mapstaticmethods) { - ptr = internal::aligned_new<float>(1000); + float* ptr = internal::aligned_new<float>(1000); for (int i = 0; i < 1000; i++) ptr[i] = float(i); - const_ptr = ptr; + const float* const_ptr = ptr; - CALL_SUBTEST_1((mapstaticmethods(Matrix<float, 1, 1>()))); - CALL_SUBTEST_1((mapstaticmethods(Vector2f()))); - CALL_SUBTEST_2((mapstaticmethods(Vector3f()))); - CALL_SUBTEST_2((mapstaticmethods(Matrix2f()))); - CALL_SUBTEST_3((mapstaticmethods(Matrix4f()))); - CALL_SUBTEST_3((mapstaticmethods(Array4f()))); - CALL_SUBTEST_4((mapstaticmethods(Array3f()))); - CALL_SUBTEST_4((mapstaticmethods(Array33f()))); - CALL_SUBTEST_5((mapstaticmethods(Array44f()))); - CALL_SUBTEST_5((mapstaticmethods(VectorXf(1)))); - CALL_SUBTEST_5((mapstaticmethods(VectorXf(8)))); - CALL_SUBTEST_6((mapstaticmethods(MatrixXf(1, 1)))); - CALL_SUBTEST_6((mapstaticmethods(MatrixXf(5, 7)))); - CALL_SUBTEST_7((mapstaticmethods(ArrayXf(1)))); - CALL_SUBTEST_7((mapstaticmethods(ArrayXf(5)))); - CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(1, 1)))); - CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(8, 6)))); + CALL_SUBTEST_1((mapstaticmethods(Matrix<float, 1, 1>(), ptr, const_ptr))); + CALL_SUBTEST_1((mapstaticmethods(Vector2f(), ptr, const_ptr))); + CALL_SUBTEST_2((mapstaticmethods(Vector3f(), ptr, const_ptr))); + CALL_SUBTEST_2((mapstaticmethods(Matrix2f(), ptr, const_ptr))); + CALL_SUBTEST_3((mapstaticmethods(Matrix4f(), ptr, const_ptr))); + CALL_SUBTEST_3((mapstaticmethods(Array4f(), ptr, const_ptr))); + CALL_SUBTEST_4((mapstaticmethods(Array3f(), ptr, const_ptr))); + CALL_SUBTEST_4((mapstaticmethods(Array33f(), ptr, const_ptr))); + CALL_SUBTEST_5((mapstaticmethods(Array44f(), ptr, const_ptr))); + CALL_SUBTEST_5((mapstaticmethods(VectorXf(1), ptr, const_ptr))); + CALL_SUBTEST_5((mapstaticmethods(VectorXf(8), ptr, const_ptr))); + CALL_SUBTEST_6((mapstaticmethods(MatrixXf(1, 1), ptr, const_ptr))); + CALL_SUBTEST_6((mapstaticmethods(MatrixXf(5, 7), ptr, const_ptr))); + CALL_SUBTEST_7((mapstaticmethods(ArrayXf(1), ptr, const_ptr))); + CALL_SUBTEST_7((mapstaticmethods(ArrayXf(5), ptr, const_ptr))); + CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(1, 1), ptr, const_ptr))); + CALL_SUBTEST_8((mapstaticmethods(ArrayXXf(8, 6), ptr, const_ptr))); internal::aligned_delete(ptr, 1000); }
diff --git a/test/numext.cpp b/test/numext.cpp index ac9b66d..a2d511b 100644 --- a/test/numext.cpp +++ b/test/numext.cpp
@@ -34,26 +34,46 @@ #define VERIFY_IS_EQUAL_OR_NANS(a, b) VERIFY(test_is_equal_or_nans(a, b)) template <typename T> +void check_negate() { + Index size = 1000; + for (Index i = 0; i < size; i++) { + T val = i == 0 ? T(0) : internal::random<T>(T(0), NumTraits<T>::highest()); + T neg_val = numext::negate(val); + VERIFY_IS_EQUAL(T(val + neg_val), T(0)); + VERIFY_IS_EQUAL(numext::negate(neg_val), val); + } +} + +template <typename T> void check_abs() { typedef typename NumTraits<T>::Real Real; Real zero(0); - if (NumTraits<T>::IsSigned) VERIFY_IS_EQUAL(numext::abs(-T(1)), T(1)); + if (NumTraits<T>::IsSigned) VERIFY_IS_EQUAL(numext::abs(numext::negate(T(1))), T(1)); VERIFY_IS_EQUAL(numext::abs(T(0)), T(0)); VERIFY_IS_EQUAL(numext::abs(T(1)), T(1)); for (int k = 0; k < 100; ++k) { T x = internal::random<T>(); - if (!internal::is_same<T, bool>::value) x = x / Real(2); + x = x / Real(2); if (NumTraits<T>::IsSigned) { - VERIFY_IS_EQUAL(numext::abs(x), numext::abs(-x)); - VERIFY(numext::abs(-x) >= zero); + VERIFY_IS_EQUAL(numext::abs(x), numext::abs(numext::negate(x))); + VERIFY(numext::abs(numext::negate(x)) >= zero); } VERIFY(numext::abs(x) >= zero); VERIFY_IS_APPROX(numext::abs2(x), numext::abs2(numext::abs(x))); } } +template <> +void check_abs<bool>() { + for (bool x : {true, false}) { + VERIFY_IS_EQUAL(numext::abs(x), x); + VERIFY(numext::abs(x) >= false); + VERIFY_IS_EQUAL(numext::abs2(x), numext::abs2(numext::abs(x))); + } +} + template <typename T> void check_arg() { typedef typename NumTraits<T>::Real Real; @@ -236,16 +256,17 @@ negative_values = {static_cast<T>(-1), static_cast<T>(NumTraits<T>::lowest())}; non_negative_values = {static_cast<T>(0), static_cast<T>(1), static_cast<T>(NumTraits<T>::highest())}; } else { - // has sign bit - const T neg_zero = static_cast<T>(-0.0); - const T neg_one = static_cast<T>(-1.0); - const T neg_inf = -std::numeric_limits<T>::infinity(); - const T neg_nan = -std::numeric_limits<T>::quiet_NaN(); // does not have sign bit const T pos_zero = static_cast<T>(0.0); const T pos_one = static_cast<T>(1.0); const T pos_inf = std::numeric_limits<T>::infinity(); const T pos_nan = std::numeric_limits<T>::quiet_NaN(); + // has sign bit + const T neg_zero = numext::negate(pos_zero); + const T neg_one = numext::negate(pos_one); + const T neg_inf = numext::negate(pos_inf); + const T neg_nan = numext::negate(pos_nan); + negative_values = {neg_zero, neg_one, neg_inf, neg_nan}; non_negative_values = {pos_zero, pos_one, pos_inf, pos_nan}; } @@ -273,6 +294,22 @@ EIGEN_DECLARE_TEST(numext) { for (int k = 0; k < g_repeat; ++k) { + CALL_SUBTEST(check_negate<signed char>()); + CALL_SUBTEST(check_negate<unsigned char>()); + CALL_SUBTEST(check_negate<short>()); + CALL_SUBTEST(check_negate<unsigned short>()); + CALL_SUBTEST(check_negate<int>()); + CALL_SUBTEST(check_negate<unsigned int>()); + CALL_SUBTEST(check_negate<long>()); + CALL_SUBTEST(check_negate<unsigned long>()); + CALL_SUBTEST(check_negate<half>()); + CALL_SUBTEST(check_negate<bfloat16>()); + CALL_SUBTEST(check_negate<float>()); + CALL_SUBTEST(check_negate<double>()); + CALL_SUBTEST(check_negate<long double>()); + CALL_SUBTEST(check_negate<std::complex<float> >()); + CALL_SUBTEST(check_negate<std::complex<double> >()); + CALL_SUBTEST(check_abs<bool>()); CALL_SUBTEST(check_abs<signed char>()); CALL_SUBTEST(check_abs<unsigned char>());
diff --git a/test/packetmath.cpp b/test/packetmath.cpp index db8c9b5..8bfa321 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp
@@ -34,11 +34,11 @@ } template <typename T> inline T REF_NMADD(const T& a, const T& b, const T& c) { - return (-a * b) + c; + return c - a * b; } template <typename T> inline T REF_NMSUB(const T& a, const T& b, const T& c) { - return (-a * b) - c; + return test::negate(a * b + c); } template <typename T> inline T REF_DIV(const T& a, const T& b) { @@ -427,6 +427,32 @@ } }; +template <typename Scalar, typename Packet, bool HasNegate = internal::packet_traits<Scalar>::HasNegate> +struct negate_test_impl { + static void run_negate(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) { + CHECK_CWISE1_IF(HasNegate, test::negate, internal::pnegate); + } + static void run_nmsub(Scalar* data1, Scalar* data2, Scalar* ref, int PacketSize) { + CHECK_CWISE3_IF(HasNegate, REF_NMSUB, internal::pnmsub); + } +}; + +template <typename Scalar, typename Packet> +struct negate_test_impl<Scalar, Packet, false> { + static void run_negate(Scalar*, Scalar*, Scalar*, int) {} + static void run_nmsub(Scalar*, Scalar*, Scalar*, int) {} +}; + +template <typename Scalar, typename Packet> +void negate_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) { + negate_test_impl<Scalar, Packet>::run_negate(data1, data2, ref, size); +} + +template <typename Scalar, typename Packet> +void nmsub_test(Scalar* data1, Scalar* data2, Scalar* ref, int size) { + negate_test_impl<Scalar, Packet>::run_negate(data1, data2, ref, size); +} + template <typename Scalar, typename Packet> void packetmath() { typedef internal::packet_traits<Scalar> PacketTraits; @@ -533,7 +559,7 @@ CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); - CHECK_CWISE1_IF(PacketTraits::HasNegate, test::negate, internal::pnegate); + negate_test<Scalar, Packet>(data1, data2, ref, PacketSize); CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal); CHECK_CWISE1(numext::conj, internal::pconj); CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign); @@ -689,7 +715,7 @@ CHECK_CWISE1_IF(PacketTraits::HasRsqrt, numext::rsqrt, internal::prsqrt); CHECK_CWISE3_IF(true, REF_MADD, internal::pmadd); if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) { - CHECK_CWISE3_IF(PacketTraits::HasNegate, REF_NMSUB, internal::pnmsub); + nmsub_test<Scalar, Packet>(data1, data2, ref, PacketSize); } // For pmsub, pnmadd, the values can cancel each other to become near zero, @@ -698,11 +724,11 @@ for (int i = 0; i < PacketSize; ++i) { data1[i] = numext::abs(internal::random<Scalar>()); data1[i + PacketSize] = numext::abs(internal::random<Scalar>()); - data1[i + 2 * PacketSize] = -numext::abs(internal::random<Scalar>()); + data1[i + 2 * PacketSize] = Scalar(0) - numext::abs(internal::random<Scalar>()); } if (!std::is_same<Scalar, bool>::value && NumTraits<Scalar>::IsSigned) { CHECK_CWISE3_IF(true, REF_MSUB, internal::pmsub); - CHECK_CWISE3_IF(PacketTraits::HasNegate, REF_NMADD, internal::pnmadd); + CHECK_CWISE3_IF(true, REF_NMADD, internal::pnmadd); } }
diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index 86a01fb..d8de04b 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h
@@ -22,11 +22,16 @@ namespace test { -template <typename T> +template <typename T, std::enable_if_t<NumTraits<T>::IsSigned, bool> = true> T negate(const T& x) { return -x; } +template <typename T, std::enable_if_t<!NumTraits<T>::IsSigned, bool> = true> +T negate(const T& x) { + return T(0) - x; +} + template <typename T> Map<const Array<unsigned char, sizeof(T), 1> > bits(const T& x) { return Map<const Array<unsigned char, sizeof(T), 1> >(reinterpret_cast<const unsigned char*>(&x));
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp index 8d47fb0..e4b1963 100644 --- a/test/sparse_vector.cpp +++ b/test/sparse_vector.cpp
@@ -110,29 +110,29 @@ // test move { - SparseVectorType v3(std::move(v1)); - VERIFY_IS_APPROX(v3, refV1); - v1 = v3; + SparseVectorType tmp(std::move(v1)); + VERIFY_IS_APPROX(tmp, refV1); + v1 = tmp; } { - SparseVectorType v3; - v3 = std::move(v1); - VERIFY_IS_APPROX(v3, refV1); - v1 = v3; + SparseVectorType tmp; + tmp = std::move(v1); + VERIFY_IS_APPROX(tmp, refV1); + v1 = tmp; } { - SparseVectorType v3(std::move(mv1)); - VERIFY_IS_APPROX(v3, refV1); - mv1 = v3; + SparseVectorType tmp(std::move(mv1)); + VERIFY_IS_APPROX(tmp, refV1); + mv1 = tmp; } { - SparseVectorType v3; - v3 = std::move(mv1); - VERIFY_IS_APPROX(v3, refV1); - mv1 = v3; + SparseVectorType tmp; + tmp = std::move(mv1); + VERIFY_IS_APPROX(tmp, refV1); + mv1 = tmp; } // test conservative resize
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index aac7248..dc1a5c7 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp
@@ -287,6 +287,7 @@ typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half, typename internal::packet_traits<Scalar>::type>::value> struct vectorization_logic_half { + using RealScalar = typename NumTraits<Scalar>::Real; typedef internal::packet_traits<Scalar> PacketTraits; typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType; static constexpr int PacketSize = internal::unpacket_traits<PacketType>::size; @@ -355,10 +356,12 @@ VERIFY(test_assign(Vector1(), Vector1().template segment<MinVSize>(0).derived(), EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal, CompleteUnrolling)); - VERIFY(test_assign(Vector1(), Scalar(2.1) * Vector1() - Vector1(), InnerVectorizedTraversal, CompleteUnrolling)); + VERIFY(test_assign(Vector1(), Scalar(RealScalar(2.1)) * Vector1() - Vector1(), InnerVectorizedTraversal, + CompleteUnrolling)); VERIFY(test_assign( Vector1(), - (Scalar(2.1) * Vector1().template segment<MinVSize>(0) - Vector1().template segment<MinVSize>(0)).derived(), + (Scalar(RealScalar(2.1)) * Vector1().template segment<MinVSize>(0) - Vector1().template segment<MinVSize>(0)) + .derived(), EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal, CompleteUnrolling)); VERIFY(test_assign(Vector1(), Vector1().cwiseProduct(Vector1()), InnerVectorizedTraversal, CompleteUnrolling)); VERIFY(test_assign(Vector1(), Vector1().template cast<Scalar>(), InnerVectorizedTraversal, CompleteUnrolling));