Update Eigen to commit:e16d70bd4e9cdebd2fbdae63b1a4d86493fbbde6 CHANGELOG ========= e16d70bd4 - Fix FFT when destination does not have unit stride. 99c18bce6 - Msvc muluh 8e4797178 - Bit shifting functions 9700fc847 - Reorganize CMake and minimize configuration for non-top-level builds. PiperOrigin-RevId: 631975359 Change-Id: I537052ef6b9ed6e3ecf90e2aebf5c3898ffc919c
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 381d8ff..8a07d50 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -709,33 +709,21 @@ } /** \internal \returns \a a arithmetically shifted by N bits to the right */ -template <int N> -EIGEN_DEVICE_FUNC inline int parithmetic_shift_right(const int& a) { - return a >> N; -} -template <int N> -EIGEN_DEVICE_FUNC inline long int parithmetic_shift_right(const long int& a) { - return a >> N; +template <int N, typename T> +EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) { + return numext::arithmetic_shift_right(a, N); } /** \internal \returns \a a logically shifted by N bits to the right */ -template <int N> -EIGEN_DEVICE_FUNC inline int plogical_shift_right(const int& a) { - return static_cast<int>(static_cast<unsigned int>(a) >> N); -} -template <int N> -EIGEN_DEVICE_FUNC inline long int plogical_shift_right(const long int& a) { - return static_cast<long>(static_cast<unsigned long>(a) >> N); +template <int N, typename T> +EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) { + return numext::logical_shift_right(a, N); } /** \internal \returns \a a shifted by N bits to the left */ -template <int N> -EIGEN_DEVICE_FUNC inline int plogical_shift_left(const int& a) { - return a << N; -} -template <int N> -EIGEN_DEVICE_FUNC inline long int plogical_shift_left(const long int& a) { - return a << N; +template <int N, typename T> +EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) { + return numext::logical_shift_left(a, N); } /** \internal \returns the significant and exponent of the underlying floating point numbers
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 6bb9a12..d42fc93 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -1746,6 +1746,23 @@ #undef SYCL_SPECIALIZE_BINARY_FUNC #endif +template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) { + return a << n; +} + +template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) { + using UnsignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type; + return bit_cast<Scalar, UnsignedScalar>(bit_cast<UnsignedScalar, Scalar>(a) >> n); +} + +template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) { + using SignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::signed_type; + return bit_cast<Scalar, SignedScalar>(bit_cast<SignedScalar, Scalar>(a) >> n); +} + } // end namespace numext namespace internal {
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 2848b78..a6e2de4 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h
@@ -101,10 +101,10 @@ template <typename Tgt, typename Src> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { // The behaviour of memcpy is not specified for non-trivially copyable types - EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED); + EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED) EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value, - THIS_TYPE_IS_NOT_SUPPORTED); - EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED); + THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) Tgt tgt; // Load src into registers first. This allows the memcpy to be elided by CUDA.
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 2b0c05c..c1bbc7c 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -219,7 +219,9 @@ */ template <typename Scalar, int N> struct scalar_shift_right_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a >> N; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { + return numext::arithmetic_shift_right(a); + } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::parithmetic_shift_right<N>(a); @@ -237,7 +239,9 @@ */ template <typename Scalar, int N> struct scalar_shift_left_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { return a << N; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { + return numext::logical_shift_left(a); + } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::plogical_shift_left<N>(a);
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 2f6f89e..9f75c1b 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake
@@ -91,6 +91,7 @@ if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) endif() + target_link_libraries(${targetname} Eigen3::Eigen) if(${ARGC} GREATER 3) set(libs_to_link ${ARGV3})
diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 1babd13..e61c99b 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt
@@ -110,6 +110,7 @@ if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) target_link_libraries(${target} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) endif() + target_link_libraries(${target} Eigen3::Eigen) add_dependencies(lapack ${target}) install(TARGETS ${target} RUNTIME DESTINATION bin
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index b5ad3c4..3b36328 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp
@@ -1068,24 +1068,45 @@ } } -template <int N> -struct shift_left { - template <typename Scalar> - Scalar operator()(const Scalar& v) const { - return (v << N); +template <typename Scalar> +struct shift_imm_traits { + enum { Cost = 1, PacketAccess = internal::packet_traits<Scalar>::HasShift }; +}; + +template <int N, typename Scalar> +struct logical_left_shift_op { + Scalar operator()(const Scalar& v) const { return numext::logical_shift_left(v, N); } + template <typename Packet> + Packet packetOp(const Packet& v) const { + return internal::plogical_shift_left<N>(v); + } +}; +template <int N, typename Scalar> +struct logical_right_shift_op { + Scalar operator()(const Scalar& v) const { return numext::logical_shift_right(v, N); } + template <typename Packet> + Packet packetOp(const Packet& v) const { + return internal::plogical_shift_right<N>(v); + } +}; +template <int N, typename Scalar> +struct arithmetic_right_shift_op { + Scalar operator()(const Scalar& v) const { return numext::arithmetic_shift_right(v, N); } + template <typename Packet> + Packet packetOp(const Packet& v) const { + return internal::parithmetic_shift_right<N>(v); } }; -template <int N> -struct arithmetic_shift_right { - template <typename Scalar> - Scalar operator()(const Scalar& v) const { - return (v >> N); - } -}; +template <int N, typename Scalar> +struct internal::functor_traits<logical_left_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {}; +template <int N, typename Scalar> +struct internal::functor_traits<logical_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {}; +template <int N, typename Scalar> +struct internal::functor_traits<arithmetic_right_shift_op<N, Scalar>> : shift_imm_traits<Scalar> {}; template <typename ArrayType> -struct signed_shift_test_impl { +struct shift_test_impl { typedef typename ArrayType::Scalar Scalar; static constexpr size_t Size = sizeof(Scalar); static constexpr size_t MaxShift = (CHAR_BIT * Size) - 1; @@ -1099,20 +1120,24 @@ ArrayType m1 = ArrayType::Random(rows, cols), m2(rows, cols), m3(rows, cols); - m2 = m1.unaryExpr(internal::scalar_shift_right_op<Scalar, N>()); - m3 = m1.unaryExpr(arithmetic_shift_right<N>()); + m2 = m1.unaryExpr([](const Scalar& v) { return numext::logical_shift_left(v, N); }); + m3 = m1.unaryExpr(logical_left_shift_op<N, Scalar>()); VERIFY_IS_CWISE_EQUAL(m2, m3); - m2 = m1.unaryExpr(internal::scalar_shift_left_op<Scalar, N>()); - m3 = m1.unaryExpr(shift_left<N>()); + m2 = m1.unaryExpr([](const Scalar& v) { return numext::logical_shift_right(v, N); }); + m3 = m1.unaryExpr(logical_right_shift_op<N, Scalar>()); + VERIFY_IS_CWISE_EQUAL(m2, m3); + + m2 = m1.unaryExpr([](const Scalar& v) { return numext::arithmetic_shift_right(v, N); }); + m3 = m1.unaryExpr(arithmetic_right_shift_op<N, Scalar>()); VERIFY_IS_CWISE_EQUAL(m2, m3); run<N + 1>(m); } }; template <typename ArrayType> -void signed_shift_test(const ArrayType& m) { - signed_shift_test_impl<ArrayType>::run(m); +void shift_test(const ArrayType& m) { + shift_test_impl<ArrayType>::run(m); } template <typename ArrayType> @@ -1361,10 +1386,10 @@ ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); CALL_SUBTEST_7(array_generic(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_8(signed_shift_test( + CALL_SUBTEST_8(shift_test( ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_9(signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), - internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_9(shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), + internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); CALL_SUBTEST_10(array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); CALL_SUBTEST_11(array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
diff --git a/test/numext.cpp b/test/numext.cpp index a2d511b..ebe9fb0 100644 --- a/test/numext.cpp +++ b/test/numext.cpp
@@ -292,6 +292,27 @@ check_signbit_impl<T>::run(); } +template <typename T> +void check_shift() { + using SignedT = typename numext::get_integer_by_size<sizeof(T)>::signed_type; + using UnsignedT = typename numext::get_integer_by_size<sizeof(T)>::unsigned_type; + constexpr int kNumBits = CHAR_BIT * sizeof(T); + for (int i = 0; i < 1000; ++i) { + const T a = internal::random<T>(); + for (int s = 1; s < kNumBits; s++) { + T a_bsll = numext::logical_shift_left(a, s); + T a_bsll_ref = a << s; + VERIFY_IS_EQUAL(a_bsll, a_bsll_ref); + T a_bsrl = numext::logical_shift_right(a, s); + T a_bsrl_ref = numext::bit_cast<T, UnsignedT>(numext::bit_cast<UnsignedT, T>(a) >> s); + VERIFY_IS_EQUAL(a_bsrl, a_bsrl_ref); + T a_bsra = numext::arithmetic_shift_right(a, s); + T a_bsra_ref = numext::bit_cast<T, SignedT>(numext::bit_cast<SignedT, T>(a) >> s); + VERIFY_IS_EQUAL(a_bsra, a_bsra_ref); + } + } +} + EIGEN_DECLARE_TEST(numext) { for (int k = 0; k < g_repeat; ++k) { CALL_SUBTEST(check_negate<signed char>()); @@ -354,5 +375,15 @@ CALL_SUBTEST(check_signbit<int16_t>()); CALL_SUBTEST(check_signbit<int32_t>()); CALL_SUBTEST(check_signbit<int64_t>()); + + CALL_SUBTEST(check_shift<int8_t>()); + CALL_SUBTEST(check_shift<int16_t>()); + CALL_SUBTEST(check_shift<int32_t>()); + CALL_SUBTEST(check_shift<int64_t>()); + + CALL_SUBTEST(check_shift<uint8_t>()); + CALL_SUBTEST(check_shift<uint16_t>()); + CALL_SUBTEST(check_shift<uint32_t>()); + CALL_SUBTEST(check_shift<uint64_t>()); } }
diff --git a/unsupported/CMakeLists.txt b/unsupported/CMakeLists.txt index 67d1f62..3904601 100644 --- a/unsupported/CMakeLists.txt +++ b/unsupported/CMakeLists.txt
@@ -4,7 +4,7 @@ endif() if(EIGEN_BUILD_TESTING) if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) - add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest + add_subdirectory(test) # CTest automatic test building relies on the "all" target. else() add_subdirectory(test EXCLUDE_FROM_ALL) endif()
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index fdb9759..fddc648 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -101,6 +101,8 @@ return __umul64hi(a, b); #elif defined(SYCL_DEVICE_ONLY) return cl::sycl::mul_hi(a, static_cast<uint64_t>(b)); +#elif EIGEN_COMP_MSVC && (EIGEN_ARCH_x86_64 || EIGEN_ARCH_ARM64) + return __umulh(a, static_cast<uint64_t>(b)); #elif EIGEN_HAS_BUILTIN_INT128 __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); return static_cast<uint64_t>(v >> 64);
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 630be1e..557fdf6 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT
@@ -231,11 +231,12 @@ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) if (nfft < 1) nfft = src.size(); - - if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum)) - dst.derived().resize((nfft >> 1) + 1); - else - dst.derived().resize(nfft); + + Index dst_size = nfft; + if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum)) { + dst_size = (nfft >> 1) + 1; + } + dst.derived().resize(dst_size); if (src.innerStride() != 1 || src.size() < nfft) { Matrix<src_type, 1, Dynamic> tmp; @@ -245,9 +246,21 @@ } else { tmp = src; } - fwd(&dst[0], &tmp[0], nfft); + if (dst.innerStride() != 1) { + Matrix<dst_type, 1, Dynamic> out(1, dst_size); + fwd(&out[0], &tmp[0], nfft); + dst.derived() = out; + } else { + fwd(&dst[0], &tmp[0], nfft); + } } else { - fwd(&dst[0], &src[0], nfft); + if (dst.innerStride() != 1) { + Matrix<dst_type, 1, Dynamic> out(1, dst_size); + fwd(&out[0], &src[0], nfft); + dst.derived() = out; + } else { + fwd(&dst[0], &src[0], nfft); + } } } @@ -326,9 +339,22 @@ } else { tmp = src; } - inv(&dst[0], &tmp[0], nfft); + + if (dst.innerStride() != 1) { + Matrix<dst_type, 1, Dynamic> out(1, nfft); + inv(&out[0], &tmp[0], nfft); + dst.derived() = out; + } else { + inv(&dst[0], &tmp[0], nfft); + } } else { - inv(&dst[0], &src[0], nfft); + if (dst.innerStride() != 1) { + Matrix<dst_type, 1, Dynamic> out(1, nfft); + inv(&out[0], &src[0], nfft); + dst.derived() = out; + } else { + inv(&dst[0], &src[0], nfft); + } } }
diff --git a/unsupported/test/fft_test_shared.h b/unsupported/test/fft_test_shared.h index 0e040ad..3adcd90 100644 --- a/unsupported/test/fft_test_shared.h +++ b/unsupported/test/fft_test_shared.h
@@ -164,9 +164,41 @@ } template <typename T> +void test_complex_strided(int nfft) { + typedef typename FFT<T>::Complex Complex; + typedef typename Eigen::Vector<Complex, Dynamic> ComplexVector; + constexpr int kInputStride = 3; + constexpr int kOutputStride = 7; + constexpr int kInvOutputStride = 13; + + FFT<T> fft; + + ComplexVector inbuf(nfft * kInputStride); + inbuf.setRandom(); + ComplexVector outbuf(nfft * kOutputStride); + outbuf.setRandom(); + ComplexVector invoutbuf(nfft * kInvOutputStride); + invoutbuf.setRandom(); + + using StridedComplexVector = Map<ComplexVector, /*MapOptions=*/0, InnerStride<Dynamic>>; + StridedComplexVector input(inbuf.data(), nfft, InnerStride<Dynamic>(kInputStride)); + StridedComplexVector output(outbuf.data(), nfft, InnerStride<Dynamic>(kOutputStride)); + StridedComplexVector inv_output(invoutbuf.data(), nfft, InnerStride<Dynamic>(kInvOutputStride)); + + for (int k = 0; k < nfft; ++k) + input[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5)); + fft.fwd(output, input); + + VERIFY(T(fft_rmse(output, input)) < test_precision<T>()); // gross check + fft.inv(inv_output, output); + VERIFY(T(dif_rmse(inv_output, input)) < test_precision<T>()); // gross check +} + +template <typename T> void test_complex(int nfft) { test_complex_generic<StdVectorContainer, T>(nfft); test_complex_generic<EigenVectorContainer, T>(nfft); + test_complex_strided<T>(nfft); } template <typename T, int nrows, int ncols>