Update Eigen to commit:9ea520fc4510b49408f1445b603b1f5dad267c2c CHANGELOG ========= 9ea520fc4 - Ensure that mc is not smaller than Traits::nr dd8c71e62 - Fix typecasting for arm32 b2cb49e28 - Static asserts to check for matching NumDimensions 283dec7f2 - Update file GeneralMatrixVector.h 66b9f4ed5 - Fix (u)int64_t->float conversion on arm d1b03fb5c - Gemv microoptimization PiperOrigin-RevId: 587143778 Change-Id: Ia7e9e827a65966b5642968ef5b1097db5b7e61cf
diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h index a265e4d..68566b0 100644 --- a/Eigen/src/Core/arch/NEON/TypeCasting.h +++ b/Eigen/src/Core/arch/NEON/TypeCasting.h
@@ -1109,15 +1109,32 @@ struct type_casting_traits<numext::int64_t, float> { enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; }; + template <> EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) { - return vcvtq_f32_s32(vcombine_s32(vmovn_s64(a), vmovn_s64(b))); -} -template <> -EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) { - return vcvt_f32_s32(vmovn_s64(a)); +#if EIGEN_ARCH_ARM64 + return vcombine_f32(vcvt_f32_f64(vcvtq_f64_s64(a)), vcvt_f32_f64(vcvtq_f64_s64(b))); +#else + EIGEN_ALIGN_MAX int64_t lvals[4]; + pstore(lvals, a); + pstore(lvals + 2, b); + EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1]), + static_cast<float>(lvals[2]), static_cast<float>(lvals[3])}; + return pload<Packet4f>(fvals); +#endif } +template <> +EIGEN_STRONG_INLINE Packet2f pcast<Packet2l, Packet2f>(const Packet2l& a) { +#if EIGEN_ARCH_ARM64 + return vcvt_f32_f64(vcvtq_f64_s64(a)); +#else + EIGEN_ALIGN_MAX int64_t lvals[2]; + pstore(lvals, a); + EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(lvals[0]), static_cast<float>(lvals[1])}; + return pload<Packet2f>(fvals); +#endif +} template <> struct type_casting_traits<numext::int64_t, numext::int32_t> { @@ -1233,11 +1250,27 @@ }; template <> EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) { - return vcvtq_f32_u32(vcombine_u32(vmovn_u64(a), vmovn_u64(b))); +#if EIGEN_ARCH_ARM64 + return vcombine_f32(vcvt_f32_f64(vcvtq_f64_u64(a)), vcvt_f32_f64(vcvtq_f64_u64(b))); +#else + EIGEN_ALIGN_MAX uint64_t uvals[4]; + pstore(uvals, a); + pstore(uvals + 2, b); + EIGEN_ALIGN_MAX float fvals[4] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1]), + static_cast<float>(uvals[2]), static_cast<float>(uvals[3])}; + return pload<Packet4f>(fvals); +#endif } template <> EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul, Packet2f>(const Packet2ul& a) { - return vcvt_f32_u32(vmovn_u64(a)); +#if EIGEN_ARCH_ARM64 + return vcvt_f32_f64(vcvtq_f64_u64(a)); +#else + EIGEN_ALIGN_MAX uint64_t uvals[2]; + pstore(uvals, a); + EIGEN_ALIGN_MAX float fvals[2] = {static_cast<float>(uvals[0]), static_cast<float>(uvals[1])}; + return pload<Packet2f>(fvals); +#endif }
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 55b637b..2e0dcb9 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -78,11 +78,14 @@ ResMapper res(res_, resStride, resIncr); Index kc = blocking.kc(); - Index mc = (std::min)(size,blocking.mc()); + // Ensure that mc >= nr and <= size + Index mc = (std::min)(size,(std::max)(static_cast<decltype(blocking.mc())>(Traits::nr),blocking.mc())); - // !!! mc must be a multiple of nr: - if(mc > Traits::nr) - mc = (mc/Traits::nr)*Traits::nr; + // !!! mc must be a multiple of nr + if (mc > Traits::nr) { + using UnsignedIndex = typename make_unsigned<Index>::type; + mc = (UnsignedIndex(mc)/Traits::nr)*Traits::nr; + } std::size_t sizeA = kc*mc; std::size_t sizeB = kc*size;
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 475ac85..cef0ade 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -362,9 +362,10 @@ HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf }; - const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize); - const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf); - const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter); + using UnsignedIndex = typename make_unsigned<Index>::type; + const Index fullColBlockEnd = LhsPacketSize * (UnsignedIndex(cols) / LhsPacketSize); + const Index halfColBlockEnd = LhsPacketSizeHalf * (UnsignedIndex(cols) / LhsPacketSizeHalf); + const Index quarterColBlockEnd = LhsPacketSizeQuarter * (UnsignedIndex(cols) / LhsPacketSizeQuarter); Index i=0; for(; i<n8; i+=8)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0614a20..fbbc98a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt
@@ -182,6 +182,7 @@ ei_add_test(type_alias) ei_add_test(nullary) ei_add_test(mixingtypes) +ei_add_test(float_conversion) ei_add_test(io) ei_add_test(packetmath "-DEIGEN_FAST_MATH=1") ei_add_test(vectorization_logic)
diff --git a/test/float_conversion.cpp b/test/float_conversion.cpp new file mode 100644 index 0000000..fd8d45d --- /dev/null +++ b/test/float_conversion.cpp
@@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include <sstream> + +#include "main.h" + +template<typename From, typename To> +void test_conversion() { + typedef Array<From, Dynamic, 1> ArrayXFrom; + typedef Array<To, Dynamic, 1> ArrayXTo; + typedef Array<double, Dynamic, 1> ArrayXDouble; + + Index size = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE); + double from_min = static_cast<double>((std::numeric_limits<From>::min)()); + double from_range = static_cast<double>((std::numeric_limits<From>::max)()) - from_min; + + // ArrayXFrom::Random() only generates 32-bit values (#2749), so we generate + // doubles and scale to fit the range. + ArrayXDouble doubles = (ArrayXDouble::Random(size)+1.0)*(from_range/2.0) + from_min; + ArrayXFrom from = doubles.template cast<From>(); + ArrayXTo to(size); + for (Index i = 0; i < size; ++i) { + to(i) = static_cast<To>(from(i)); + } + VERIFY_IS_APPROX(from.template cast<To>(), to); +} + +template<typename To> +void test_conversion_to() { + CALL_SUBTEST((test_conversion<int64_t, To>())); + CALL_SUBTEST((test_conversion<uint64_t, To>())); + CALL_SUBTEST((test_conversion<int32_t, To>())); + CALL_SUBTEST((test_conversion<uint32_t, To>())); + CALL_SUBTEST((test_conversion<int16_t, To>())); + CALL_SUBTEST((test_conversion<uint16_t, To>())); + CALL_SUBTEST((test_conversion<int8_t, To>())); + CALL_SUBTEST((test_conversion<uint8_t, To>())); +} + +EIGEN_DECLARE_TEST(float_conversion) +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST(test_conversion_to<float>()); + CALL_SUBTEST(test_conversion_to<double>()); + } +}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index a24097a..e596147 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -299,6 +299,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) { + EIGEN_STATIC_ASSERT(OtherDerived::NumDimensions == Base::NumDimensions, Number_of_dimensions_must_match) typedef TensorAssignOp<Tensor, const OtherDerived> Assign; Assign assign(*this, other.derived()); resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); @@ -309,6 +310,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other) { + EIGEN_STATIC_ASSERT(OtherDerived::NumDimensions == Base::NumDimensions, Number_of_dimensions_must_match) typedef TensorAssignOp<Tensor, const OtherDerived> Assign; Assign assign(*this, other.derived()); resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());