Update Eigen to commit:d4ae542ed1c6f3eaad29445100052489471e38ea CHANGELOG ========= d4ae542ed - Fix nullptr dereference issue in triangular product. 7769eb1b2 - Fix problems with recent changes and Tensorflow in Power ba1cb6e45 - Fixes #2703 by adding max_digits10 function 9995c3da6 - Fix -Wmaybe-uninitialized in SVD 4e9e493b4 - Fix -Waggressive-loop-optimizations 6e7abeae6 - fix arm build warnings 81fe2d424 - Fix more gcc compiler warnings / sort-of bugs 21cd3fe20 - Optimize check_rows_cols_for_overflow 9297aae66 - Fix AVX512 nomalloc issues in trsm. 1a2bfca8f - Fix annoying warnings 63dcb429c - Fix use of arg function in CUDA. 8f927fb52 - Altivec: fix compilation with C++20 and higher d4b05454a - Fix argument for _mm256_cvtps_ph imm parameter 15ac3765c - Fix ivcSize return type in IndexedViewMethods.h 3791ac8a1 - Fix supportsMMA to obey EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH compilation flag and compiler support. bc57b926a - Add Quaternion constructor from real scalar and imaginary vector 31cd2ad37 - Ensure EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC is always defined on arm. 7465b7651 - Disable FP16 arithmetic for arm32. b3267f693 - Remove unused variable in test/svd_common.h. PiperOrigin-RevId: 552130715 Change-Id: I71c627f279ef0cbc6ce0a231a9fef9acd22e244e
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 5805763..dad2af8 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h
@@ -357,7 +357,7 @@ */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, Index i) - : Base(add_to_nullable_pointer(xpr.data(), + : Base((BlockRows == 0 || BlockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(), i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride())), BlockRows==1 ? 1 : xpr.rows(), @@ -373,7 +373,7 @@ */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(add_to_nullable_pointer(xpr.data(), + : Base((BlockRows == 0 || BlockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(), xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol))), m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) { @@ -386,7 +386,7 @@ BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(add_to_nullable_pointer(xpr.data(), + : Base((blockRows == 0 || blockCols == 0) ? nullptr : add_to_nullable_pointer(xpr.data(), xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), blockRows, blockCols), m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 0c13192..139e2ca 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h
@@ -72,8 +72,9 @@ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) #if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) - typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func; - EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); + EIGEN_CHECK_BINARY_COMPATIBILIY( + Eigen::internal::scalar_conj_product_op<Scalar EIGEN_COMMA typename OtherDerived::Scalar>, + Scalar, typename OtherDerived::Scalar); #endif eigen_assert(size() == other.size());
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3d2f144..ceb7a0a 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -1366,8 +1366,7 @@ /** \internal \returns the argument of \a a as a complex number */ template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) { - using Scalar = typename unpacket_traits<Packet>::type; - EIGEN_STATIC_ASSERT(NumTraits<Scalar>::IsComplex, THIS METHOD IS FOR COMPLEX TYPES ONLY) + EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex, THIS METHOD IS FOR COMPLEX TYPES ONLY) using RealPacket = typename unpacket_traits<Packet>::as_real; // a // r i r i ... RealPacket aflip = pcplxflip(a).v; // i r i r ...
diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index 053905f..6993729 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h
@@ -117,13 +117,13 @@ // NOTE: This helper is kept for backward compatibility with previous code specializing // this internal::significant_decimals_impl structure. In the future we should directly -// call digits10() which has been introduced in July 2016 in 3.3. +// call max_digits10(). template<typename Scalar> struct significant_decimals_impl { static inline int run() { - return NumTraits<Scalar>::digits10(); + return NumTraits<Scalar>::max_digits10(); } };
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 822701b..5507707 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -482,12 +482,8 @@ EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - #if defined(EIGEN_HIP_DEVICE_COMPILE) - // HIP does not seem to have a native device side implementation for the math routine "arg" + // There is no official ::arg on device in CUDA/HIP, so we always need to use std::arg. using std::arg; - #else - EIGEN_USING_STD(arg); - #endif return static_cast<RealScalar>(arg(x)); } };
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index ff33aa6..f5275d8 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h
@@ -16,37 +16,6 @@ namespace internal { -// default implementation of digits10(), based on numeric_limits if specialized, -// 0 for integer types, and log10(epsilon()) otherwise. -template< typename T, - bool use_numeric_limits = std::numeric_limits<T>::is_specialized, - bool is_integer = NumTraits<T>::IsInteger> -struct default_digits10_impl -{ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR - static int run() { return std::numeric_limits<T>::digits10; } -}; - -template<typename T> -struct default_digits10_impl<T,false,false> // Floating point -{ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR - static int run() { - using std::log10; - using std::ceil; - typedef typename NumTraits<T>::Real Real; - return int(ceil(-log10(NumTraits<Real>::epsilon()))); - } -}; - -template<typename T> -struct default_digits10_impl<T,false,true> // Integer -{ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR - static int run() { return 0; } -}; - - // default implementation of digits(), based on numeric_limits if specialized, // 0 for integer types, and log2(epsilon()) otherwise. template< typename T, @@ -77,6 +46,66 @@ static int run() { return 0; } }; +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and floor((digits()-1)*log10(2)) otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits<T>::is_specialized, + bool is_integer = NumTraits<T>::IsInteger> +struct default_digits10_impl +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { return std::numeric_limits<T>::digits10; } +}; + +template<typename T> +struct default_digits10_impl<T,false,false> // Floating point +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { + using std::log10; + using std::floor; + typedef typename NumTraits<T>::Real Real; + return int(floor((internal::default_digits_impl<Real>::run()-1)*log10(2))); + } +}; + +template<typename T> +struct default_digits10_impl<T,false,true> // Integer +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { return 0; } +}; + +// default implementation of max_digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(2) * digits() + 1 otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits<T>::is_specialized, + bool is_integer = NumTraits<T>::IsInteger> +struct default_max_digits10_impl +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { return std::numeric_limits<T>::max_digits10; } +}; + +template<typename T> +struct default_max_digits10_impl<T,false,false> // Floating point +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { + using std::log10; + using std::ceil; + typedef typename NumTraits<T>::Real Real; + return int(ceil(internal::default_digits_impl<Real>::run()*log10(2)+1)); + } +}; + +template<typename T> +struct default_max_digits10_impl<T,false,true> // Integer +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int run() { return 0; } +}; + } // end namespace internal namespace numext { @@ -143,6 +172,9 @@ * \li digits10() function returning the number of decimal digits that can be represented without change. This is * the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a> * which is used as the default implementation if specialized. + * \li max_digits10() function returning the number of decimal digits required to uniquely represent all distinct values of the type. This is + * the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10">std::numeric_limits<T>::max_digits10</a> + * which is used as the default implementation if specialized. * \li min_exponent() and max_exponent() functions returning the highest and lowest possible values, respectively, * such that the radix raised to the power exponent-1 is a normalized floating-point number. These are equivalent to * <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/min_exponent">std::numeric_limits<T>::min_exponent</a>/ @@ -181,6 +213,12 @@ } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static inline int max_digits10() + { + return internal::default_max_digits10_impl<T>::run(); + } + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int digits() { return internal::default_digits_impl<T>::run(); @@ -282,6 +320,8 @@ static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int digits10() { return NumTraits<Real>::digits10(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static inline int max_digits10() { return NumTraits<Real>::max_digits10(); } }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> @@ -312,6 +352,8 @@ EIGEN_CONSTEXPR static inline int digits10() { return NumTraits<Scalar>::digits10(); } + EIGEN_CONSTEXPR + static inline int max_digits10() { return NumTraits<Scalar>::max_digits10(); } }; template<> struct NumTraits<std::string> @@ -326,8 +368,10 @@ EIGEN_CONSTEXPR static inline int digits10() { return 0; } + EIGEN_CONSTEXPR + static inline int max_digits10() { return 0; } -private: + private: static inline std::string epsilon(); static inline std::string dummy_precision(); static inline std::string lowest();
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index fc2e941..ba6e51d 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h
@@ -28,21 +28,40 @@ namespace internal { -template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow { +template <int MaxSizeAtCompileTime, int MaxRowsAtCompileTime, int MaxColsAtCompileTime> +struct check_rows_cols_for_overflow { + EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime * MaxColsAtCompileTime == MaxSizeAtCompileTime,YOU MADE A PROGRAMMING MISTAKE) template <typename Index> EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {} }; -template<> struct check_rows_cols_for_overflow<Dynamic> { +template <int MaxRowsAtCompileTime> +struct check_rows_cols_for_overflow<Dynamic, MaxRowsAtCompileTime, Dynamic> { + template <typename Index> + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) { + constexpr Index MaxIndex = NumTraits<Index>::highest(); + bool error = cols > MaxIndex / MaxRowsAtCompileTime; + if (error) throw_std_bad_alloc(); + } +}; + +template <int MaxColsAtCompileTime> +struct check_rows_cols_for_overflow<Dynamic, Dynamic, MaxColsAtCompileTime> { + template <typename Index> + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) { + constexpr Index MaxIndex = NumTraits<Index>::highest(); + bool error = rows > MaxIndex / MaxColsAtCompileTime; + if (error) throw_std_bad_alloc(); + } +}; + +template <> +struct check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic> { template <typename Index> EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) { - // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 - // we assume Index is signed - Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed - bool error = (rows == 0 || cols == 0) ? false - : (rows > max_index / cols); - if (error) - throw_std_bad_alloc(); + constexpr Index MaxIndex = NumTraits<Index>::highest(); + bool error = cols == 0 ? false : (rows > MaxIndex / cols); + if (error) throw_std_bad_alloc(); } }; @@ -268,7 +287,7 @@ && internal::check_implication(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic, rows<=MaxRowsAtCompileTime) && internal::check_implication(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic, cols<=MaxColsAtCompileTime) && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); - internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols); + internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(rows, cols); #ifdef EIGEN_INITIALIZE_COEFFS Index size = rows*cols; bool size_changed = size != this->size(); @@ -340,7 +359,7 @@ EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other) { const OtherDerived& other = _other.derived(); - internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols()); + internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(other.rows(), other.cols()); const Index othersize = other.rows()*other.cols(); if(RowsAtCompileTime == 1) { @@ -775,11 +794,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, std::enable_if_t<Base::SizeAtCompileTime!=2,T0>* = 0) { - const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value; - const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value; - EIGEN_STATIC_ASSERT(t0_is_integer_alike && - t1_is_integer_alike, - FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + EIGEN_STATIC_ASSERT(internal::is_valid_index_type<T0>::value && + internal::is_valid_index_type<T1>::value, + T0 AND T1 MUST BE INTEGER TYPES) resize(rows,cols); } @@ -967,7 +984,7 @@ && (( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows (!Derived::IsRowMajor && _this.rows() == rows) )) // column-major and we change only the number of columns { - internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols); + internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime, Derived::MaxRowsAtCompileTime, Derived::MaxColsAtCompileTime>::run(rows, cols); _this.derived().m_storage.conservativeResize(rows*cols,rows,cols); } else
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index e347a0f..32d51d2 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h
@@ -334,14 +334,14 @@ typedef internal::traits<Ref> Traits; static constexpr bool may_map_m_object_successfully = - (StrideType::InnerStrideAtCompileTime == 0 || - StrideType::InnerStrideAtCompileTime == 1 || - StrideType::InnerStrideAtCompileTime == Dynamic) && + (static_cast<int>(StrideType::InnerStrideAtCompileTime) == 0 || + static_cast<int>(StrideType::InnerStrideAtCompileTime) == 1 || + static_cast<int>(StrideType::InnerStrideAtCompileTime) == Dynamic) && (TPlainObjectType::IsVectorAtCompileTime || - StrideType::OuterStrideAtCompileTime == 0 || - StrideType::OuterStrideAtCompileTime == Dynamic || - StrideType::OuterStrideAtCompileTime == TPlainObjectType::InnerSizeAtCompileTime || - TPlainObjectType::InnerSizeAtCompileTime == Dynamic); + static_cast<int>(StrideType::OuterStrideAtCompileTime) == 0 || + static_cast<int>(StrideType::OuterStrideAtCompileTime) == Dynamic || + static_cast<int>(StrideType::OuterStrideAtCompileTime) == static_cast<int>(TPlainObjectType::InnerSizeAtCompileTime) || + static_cast<int>(TPlainObjectType::InnerSizeAtCompileTime) == Dynamic); public: typedef RefBase<Ref> Base;
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h index ee28da1..2715a1e 100644 --- a/Eigen/src/Core/VectorBlock.h +++ b/Eigen/src/Core/VectorBlock.h
@@ -69,8 +69,7 @@ public: EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock) EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock) - - using Base::operator=; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(VectorBlock) /** Dynamic-size constructor */
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 30eca82..9bbbc13 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -1847,7 +1847,7 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) { #ifdef EIGEN_HAS_FP16_C - return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC); + return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT); #else __m128i lo = float2half(_mm256_extractf128_ps(a, 0)); __m128i hi = float2half(_mm256_extractf128_ps(a, 1));
diff --git a/Eigen/src/Core/arch/AVX512/TrsmKernel.h b/Eigen/src/Core/arch/AVX512/TrsmKernel.h index 8fbf161..714afac 100644 --- a/Eigen/src/Core/arch/AVX512/TrsmKernel.h +++ b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
@@ -16,6 +16,13 @@ #define EIGEN_USE_AVX512_TRSM_KERNELS 1 #endif +// TRSM kernels currently unconditionally rely on malloc with AVX512. +// Disable them if malloc is explicitly disabled at compile-time. +#ifdef EIGEN_NO_MALLOC +#undef EIGEN_USE_AVX512_TRSM_KERNELS +#define EIGEN_USE_AVX512_TRSM_KERNELS 0 +#endif + #if EIGEN_USE_AVX512_TRSM_KERNELS #if !defined(EIGEN_USE_AVX512_TRSM_R_KERNELS) #define EIGEN_USE_AVX512_TRSM_R_KERNELS 1 @@ -860,32 +867,6 @@ } } -#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC) -/** - * Reduce blocking sizes so that the size of the temporary workspace needed is less than "limit" bytes, - * - kB must be at least psize - * - numM must be at least EIGEN_AVX_MAX_NUM_ROW - */ -template <typename Scalar, bool isBRowMajor> -constexpr std::pair<int64_t, int64_t> trsmBlocking(const int64_t limit) { - constexpr int64_t psize = packet_traits<Scalar>::size; - int64_t kB = 15 * psize; - int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW; - // If B is rowmajor, no temp workspace needed, so use default blocking sizes. - if (isBRowMajor) return {kB, numM}; - - // Very simple heuristic, prefer keeping kB as large as possible to fully use vector registers. - for (int64_t k = kB; k > psize; k -= psize) { - for (int64_t m = numM; m > EIGEN_AVX_MAX_NUM_ROW; m -= EIGEN_AVX_MAX_NUM_ROW) { - if ((((k + psize - 1) / psize + 4) * psize) * m * sizeof(Scalar) < limit) { - return {k, m}; - } - } - } - return {psize, EIGEN_AVX_MAX_NUM_ROW}; // Minimum blocking size required -} -#endif // (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC) - /** * Main triangular solve driver * @@ -930,30 +911,8 @@ * large enough to allow GEMM updates to have larger "K"s (see below.) No benchmarking has been done so far to * determine optimal values for numM. */ -#if (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC) - /** - * If EIGEN_NO_MALLOC is requested, we try to reduce kB and numM so the maximum temp workspace required is less - * than EIGEN_STACK_ALLOCATION_LIMIT. Actual workspace size may be less, depending on the number of vectors to - * solve. - * - kB must be at least psize - * - numM must be at least EIGEN_AVX_MAX_NUM_ROW - * - * If B is row-major, the blocking sizes are not reduced (no temp workspace needed). - */ - constexpr std::pair<int64_t, int64_t> blocking_ = trsmBlocking<Scalar, isBRowMajor>(EIGEN_STACK_ALLOCATION_LIMIT); - constexpr int64_t kB = blocking_.first; - constexpr int64_t numM = blocking_.second; - /** - * If the temp workspace size exceeds EIGEN_STACK_ALLOCATION_LIMIT even with the minimum blocking sizes, - * we throw an assertion. Use -DEIGEN_USE_AVX512_TRSM_L_KERNELS=0 if necessary - */ - static_assert(!(((((kB + psize - 1) / psize + 4) * psize) * numM * sizeof(Scalar) >= EIGEN_STACK_ALLOCATION_LIMIT) && - !isBRowMajor), - "Temp workspace required is too large."); -#else constexpr int64_t kB = (3 * psize) * 5; // 5*U3 constexpr int64_t numM = 8 * EIGEN_AVX_MAX_NUM_ROW; -#endif int64_t sizeBTemp = 0; Scalar *B_temp = NULL; @@ -966,13 +925,7 @@ sizeBTemp = (((std::min(kB, numRHS) + psize - 1) / psize + 4) * psize) * numM; } -#if !defined(EIGEN_NO_MALLOC) EIGEN_IF_CONSTEXPR(!isBRowMajor) B_temp = (Scalar *)handmade_aligned_malloc(sizeof(Scalar) * sizeBTemp, 64); -#elif (EIGEN_USE_AVX512_TRSM_L_KERNELS) && defined(EIGEN_NO_MALLOC) - // Use alloca if malloc not allowed, requested temp workspace size should be less than EIGEN_STACK_ALLOCATION_LIMIT - ei_declare_aligned_stack_constructed_variable(Scalar, B_temp_alloca, sizeBTemp, 0); - B_temp = B_temp_alloca; -#endif for (int64_t k = 0; k < numRHS; k += kB) { int64_t bK = numRHS - k > kB ? kB : numRHS - k; @@ -1102,43 +1055,55 @@ } } -#if !defined(EIGEN_NO_MALLOC) EIGEN_IF_CONSTEXPR(!isBRowMajor) handmade_aligned_free(B_temp); -#endif } // Template specializations of trsmKernelL/R for float/double and inner strides of 1. #if (EIGEN_USE_AVX512_TRSM_KERNELS) #if (EIGEN_USE_AVX512_TRSM_R_KERNELS) -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized> struct trsmKernelR; template <typename Index, int Mode, int TriStorageOrder> -struct trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1> { +struct trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, true> { static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr, Index otherStride); }; template <typename Index, int Mode, int TriStorageOrder> -struct trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1> { +struct trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, true> { static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr, Index otherStride); }; template <typename Index, int Mode, int TriStorageOrder> -EIGEN_DONT_INLINE void trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1>::kernel( +EIGEN_DONT_INLINE void trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, true>::kernel( Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr, Index otherStride) { EIGEN_UNUSED_VARIABLE(otherIncr); +#ifdef EIGEN_NO_RUNTIME_MALLOC + if (!is_malloc_allowed()) { + trsmKernelR<float, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel( + size, otherSize, _tri, triStride, _other, otherIncr, otherStride); + return; + } +#endif triSolve<float, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>( const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride); } template <typename Index, int Mode, int TriStorageOrder> -EIGEN_DONT_INLINE void trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1>::kernel( +EIGEN_DONT_INLINE void trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, true>::kernel( Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr, Index otherStride) { EIGEN_UNUSED_VARIABLE(otherIncr); +#ifdef EIGEN_NO_RUNTIME_MALLOC + if (!is_malloc_allowed()) { + trsmKernelR<double, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel( + size, otherSize, _tri, triStride, _other, otherIncr, otherStride); + return; + } +#endif triSolve<double, TriStorageOrder != RowMajor, true, (Mode & Lower) != Lower, (Mode & UnitDiag) != 0>( const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride); } @@ -1146,35 +1111,49 @@ // These trsm kernels require temporary memory allocation #if (EIGEN_USE_AVX512_TRSM_L_KERNELS) -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized = true> struct trsmKernelL; template <typename Index, int Mode, int TriStorageOrder> -struct trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1> { +struct trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, true> { static void kernel(Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr, Index otherStride); }; template <typename Index, int Mode, int TriStorageOrder> -struct trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1> { +struct trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, true> { static void kernel(Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr, Index otherStride); }; template <typename Index, int Mode, int TriStorageOrder> -EIGEN_DONT_INLINE void trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1>::kernel( +EIGEN_DONT_INLINE void trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, true>::kernel( Index size, Index otherSize, const float *_tri, Index triStride, float *_other, Index otherIncr, Index otherStride) { EIGEN_UNUSED_VARIABLE(otherIncr); +#ifdef EIGEN_NO_RUNTIME_MALLOC + if (!is_malloc_allowed()) { + trsmKernelL<float, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel( + size, otherSize, _tri, triStride, _other, otherIncr, otherStride); + return; + } +#endif triSolve<float, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>( const_cast<float *>(_tri), _other, size, otherSize, triStride, otherStride); } template <typename Index, int Mode, int TriStorageOrder> -EIGEN_DONT_INLINE void trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1>::kernel( +EIGEN_DONT_INLINE void trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, true>::kernel( Index size, Index otherSize, const double *_tri, Index triStride, double *_other, Index otherIncr, Index otherStride) { EIGEN_UNUSED_VARIABLE(otherIncr); +#ifdef EIGEN_NO_RUNTIME_MALLOC + if (!is_malloc_allowed()) { + trsmKernelL<double, Index, Mode, false, TriStorageOrder, 1, /*Specialized=*/false>::kernel( + size, otherSize, _tri, triStride, _other, otherIncr, otherStride); + return; + } +#endif triSolve<double, TriStorageOrder == RowMajor, false, (Mode & Lower) == Lower, (Mode & UnitDiag) != 0>( const_cast<double *>(_tri), _other, size, otherSize, triStride, otherStride); }
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h index e86cc5b..1c5c048 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h
@@ -15,8 +15,6 @@ #define EIGEN_ALTIVEC_USE_CUSTOM_PACK 1 #endif -#include "MatrixProductCommon.h" - #if !defined(EIGEN_ALTIVEC_DISABLE_MMA) #define EIGEN_ALTIVEC_DISABLE_MMA 0 #endif @@ -45,6 +43,8 @@ #endif // EIGEN_ALTIVEC_MMA_SUPPORT +#include "MatrixProductCommon.h" + #if defined(EIGEN_ALTIVEC_MMA_ONLY) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) #include "MatrixProductMMA.h" #endif @@ -2713,12 +2713,10 @@ { #if defined(EIGEN_ALTIVEC_MMA_ONLY) return true; -#else -#if EIGEN_COMP_LLVM - return false; // No dynamic dispatch for LLVM -#else +#elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) && defined(__BUILTIN_CPU_SUPPORTS__) return __builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"); -#endif +#else + return false; // No dynamic dispatch for LLVM or older GCC #endif }
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h index cb18311..daed8c1 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h
@@ -5,7 +5,7 @@ #define EIGEN_POWER_PREFETCH(p) #endif -#ifdef _ARCH_PWR9 +#if defined(_ARCH_PWR9) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) #define USE_PARTIAL_PACKETS #endif
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h index 011d68e..5094118 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h
@@ -394,10 +394,12 @@ b0 = vec_mergeh(b0.m_val, b1.m_val); } - LhsMapper lhs2 = lhs.getSubMapper(0, j); + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(0, j); BFLOAT16_UNROLL for(Index k = 0; k < num_acc; k += 2) { - loadVecLoop<num_acc, LhsMapper, zero>(k, lhs2, a0, b1); + loadVecLoop<num_acc, LhsSubMapper, zero>(k, lhs2, a0, b1); } multVec<num_acc>(quad_acc, a0, b0); @@ -418,12 +420,14 @@ zeroAccumulators<num_acc>(quad_acc); - LhsMapper lhs2 = lhs.getSubMapper(row, 0); + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(row, 0); for(Index j = 0; j + 2 <= cend; j += 2) { - vecColLoop<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, quad_acc); + vecColLoop<num_acc, LhsSubMapper, RhsMapper, false, linear>(j, lhs2, rhs, quad_acc); } if (cend & 1) { - vecColLoop<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, quad_acc); + vecColLoop<num_acc, LhsSubMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, quad_acc); } disassembleAccumulators<num_acc>(quad_acc, acc); @@ -490,6 +494,33 @@ } } +template<typename RhsMapper, typename LhsMapper, typename = void> +struct UseMMAStride : std::false_type { + static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result) + { + using RhsSubMapper = typename RhsMapper::SubMapper; + + RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0); + calcVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } +}; + +template<typename RhsMapper, typename LhsMapper> +struct UseMMAStride<RhsMapper, LhsMapper, std::enable_if_t<std::is_member_function_pointer< + decltype(&RhsMapper::stride)>::value>> : std::true_type { + static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result) + { + using RhsSubMapper = typename RhsMapper::SubMapper; + + RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0); + if (rhs.stride() == 1) { + calcVecColLoops<LhsMapper, RhsSubMapper, true>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } else { + calcVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } + } +}; + template<typename LhsMapper, typename RhsMapper> void gemvMMA_bfloat16_col( Index rows, Index cols, @@ -498,8 +529,6 @@ bfloat16* res, Index resIncr, bfloat16 alpha) { - typedef typename RhsMapper::LinearMapper LinearMapper; - EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr == 1); @@ -523,14 +552,10 @@ { Index jend = numext::mini(j2 + block_cols, cols); - LhsMapper lhs2 = lhs.getSubMapper(0, j2); - if (rhs.stride() == 1) { - LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0); - calcVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result); - } else { - RhsMapper rhs3 = rhs2.getSubMapper(j2, 0); - calcVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result); - } + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(0, j2); + UseMMAStride<RhsMapper, LhsSubMapper>::run(j2, jend, rows, lhs2, rhs2, pAlpha, result); } convertArrayPointerF32toBF16(result, rows, res);
diff --git a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h index b8e1ab6..9170230 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h
@@ -527,7 +527,13 @@ // linear == false static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper& rhs, Index j) { - return pgather<bfloat16, Packet8bf>(&rhs(j + 0, 0), rhs.stride()); + const Index n = unpacket_traits<Packet8bf>::size; + EIGEN_ALIGN16 bfloat16 to[n]; + LOAD_STORE_UNROLL_16 + for (Index i = 0; i < n; i++) { + to[i] = rhs(j + i, 0); + } + return pload<Packet8bf>(to); } }; @@ -537,7 +543,7 @@ // linear == true static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper& rhs, Index j) { - return rhs.template loadPacket<Packet8bf>(j + 0); + return rhs.template loadPacket<Packet8bf>(j + 0, 0); } }; @@ -558,9 +564,11 @@ b0[1] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V2); } - LhsMapper lhs2 = lhs.getSubMapper(0, j); + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(0, j); for(Index k = 0; k < num_acc; k += 2) { - loadVecLoopVSX<num_acc, LhsMapper, zero>(k, lhs2, a0); + loadVecLoopVSX<num_acc, LhsSubMapper, zero>(k, lhs2, a0); } multVecVSX<num_acc, zero>(acc, a0, b0); @@ -589,12 +597,14 @@ zeroAccumulators<num_acc, 2>(acc); - LhsMapper lhs2 = lhs.getSubMapper(row, 0); + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(row, 0); for(Index j = 0; j + 2 <= cend; j += 2) { - vecColLoopVSX<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc); + vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc); } if (cend & 1) { - vecColLoopVSX<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc); + vecColLoopVSX<num_acc, LhsSubMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc); } addResultsVSX<num_acc>(acc); @@ -716,6 +726,33 @@ convertPointerF32toBF16VSX<1,inc>(i, result, rows, dst, resInc); } +template<typename RhsMapper, typename LhsMapper, typename = void> +struct UseStride : std::false_type { + static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result) + { + using RhsSubMapper = typename RhsMapper::SubMapper; + + RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0); + calcVSXVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } +}; + +template<typename RhsMapper, typename LhsMapper> +struct UseStride<RhsMapper, LhsMapper, std::enable_if_t<std::is_member_function_pointer< + decltype(&RhsMapper::stride)>::value>> : std::true_type { + static EIGEN_ALWAYS_INLINE void run(Index j2, Index jend, Index rows, LhsMapper& lhs, RhsMapper& rhs, Packet4f pAlpha, float *result) + { + using RhsSubMapper = typename RhsMapper::SubMapper; + + RhsSubMapper rhs2 = rhs.getSubMapper(j2, 0); + if (rhs.stride() == 1) { + calcVSXVecColLoops<LhsMapper, RhsSubMapper, true>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } else { + calcVSXVecColLoops<LhsMapper, RhsSubMapper, false>(jend - j2, rows, lhs, rhs2, pAlpha, result); + } + } +}; + template<typename LhsMapper, typename RhsMapper> void gemv_bfloat16_col( Index rows, Index cols, @@ -724,8 +761,6 @@ bfloat16* res, Index resIncr, bfloat16 alpha) { - typedef typename RhsMapper::LinearMapper LinearMapper; - EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr == 1); @@ -749,14 +784,10 @@ { Index jend = numext::mini(j2 + block_cols, cols); - LhsMapper lhs2 = lhs.getSubMapper(0, j2); - if (rhs.stride() == 1) { - LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0); - calcVSXVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result); - } else { - RhsMapper rhs3 = rhs2.getSubMapper(j2, 0); - calcVSXVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result); - } + using LhsSubMapper = typename LhsMapper::SubMapper; + + LhsSubMapper lhs2 = lhs.getSubMapper(0, j2); + UseStride<RhsMapper, LhsSubMapper>::run(j2, jend, rows, lhs2, rhs2, pAlpha, result); } convertArrayPointerF32toBF16VSX(result, rows, res); @@ -1410,7 +1441,7 @@ template<typename PResPacket, typename ResPacket, typename ResScalar, typename Scalar> struct alpha_store { - alpha_store<PResPacket, ResPacket, ResScalar, Scalar>(ResScalar& alpha) { + alpha_store(ResScalar& alpha) { separate.r = pset1_complex<Scalar, ResScalar, ResPacket, 0x3>(alpha); separate.i = pset1_complex<Scalar, ResScalar, ResPacket, 0x0>(alpha); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e52e3fb..638dea4 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -4003,6 +4003,8 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq_f64(_x); } +#endif // EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG + // Do we have an fp16 types and supporting Neon intrinsics? #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC typedef float16x4_t Packet4hf; @@ -4651,8 +4653,6 @@ } #endif // end EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC -#endif // EIGEN_ARCH_ARM64 - } // end namespace internal } // end namespace Eigen
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 7307994..e4c6f23 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -361,6 +361,10 @@ HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf }; + const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize); + const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf); + const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter); + Index i=0; for(; i<n8; i+=8) { @@ -373,8 +377,7 @@ c6 = pset1<ResPacket>(ResScalar(0)), c7 = pset1<ResPacket>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) { RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); @@ -395,7 +398,8 @@ ResScalar cc5 = predux(c5); ResScalar cc6 = predux(c6); ResScalar cc7 = predux(c7); - for(; j<cols; ++j) + + for (Index j = fullColBlockEnd; j < cols; ++j) { RhsScalar b0 = rhs(j,0); @@ -424,8 +428,7 @@ c2 = pset1<ResPacket>(ResScalar(0)), c3 = pset1<ResPacket>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) { RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); @@ -438,7 +441,8 @@ ResScalar cc1 = predux(c1); ResScalar cc2 = predux(c2); ResScalar cc3 = predux(c3); - for(; j<cols; ++j) + + for(Index j = fullColBlockEnd; j < cols; ++j) { RhsScalar b0 = rhs(j,0); @@ -457,8 +461,7 @@ ResPacket c0 = pset1<ResPacket>(ResScalar(0)), c1 = pset1<ResPacket>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) { RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); @@ -467,7 +470,8 @@ } ResScalar cc0 = predux(c0); ResScalar cc1 = predux(c1); - for(; j<cols; ++j) + + for(Index j = fullColBlockEnd; j < cols; ++j) { RhsScalar b0 = rhs(j,0); @@ -482,15 +486,15 @@ ResPacket c0 = pset1<ResPacket>(ResScalar(0)); ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0)); ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + + for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) { RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0); c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0); } ResScalar cc0 = predux(c0); if (HasHalf) { - for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf) + for (Index j = fullColBlockEnd; j < halfColBlockEnd; j += LhsPacketSizeHalf) { RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0); c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h); @@ -498,14 +502,14 @@ cc0 += predux(c0_h); } if (HasQuarter) { - for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter) + for (Index j = halfColBlockEnd; j < quarterColBlockEnd; j += LhsPacketSizeQuarter) { RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0); c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q); } cc0 += predux(c0_q); } - for(; j<cols; ++j) + for (Index j = quarterColBlockEnd; j < cols; ++j) { cc0 += cj.pmul(lhs(i,j), rhs(j,0)); }
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 770107a..94eabdc 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -422,6 +422,12 @@ internal::add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs); internal::add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs); + // Empty product, return early. Otherwise, we get `nullptr` use errors below when we try to access + // coeffRef(0,0). + if (a_lhs.size() == 0 || a_rhs.size() == 0) { + return; + } + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index b148d9c..22b4a7f 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -17,7 +17,7 @@ namespace internal { -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized> struct trsmKernelL { // Generic Implementation of triangular solve for triangular matrix on left and multiple rhs. // Handles non-packed matrices. @@ -27,7 +27,7 @@ Scalar* _other, Index otherIncr, Index otherStride); }; -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride> +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized> struct trsmKernelR { // Generic Implementation of triangular solve for triangular matrix on right and multiple lhs. // Handles non-packed matrices. @@ -37,8 +37,8 @@ Scalar* _other, Index otherIncr, Index otherStride); }; -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride> -EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel( +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride, bool Specialized> +EIGEN_STRONG_INLINE void trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, Specialized>::kernel( Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr, Index otherStride) @@ -88,8 +88,8 @@ } -template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride> -EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel( +template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride, bool Specialized> +EIGEN_STRONG_INLINE void trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, Specialized>::kernel( Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherIncr, Index otherStride) @@ -180,7 +180,7 @@ // TODO: Investigate better heuristics for cutoffs. double L2Cap = 0.5; // 50% of L2 size if (size < avx512_trsm_cutoff<Scalar>(l2, cols, L2Cap)) { - trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1>::kernel( + trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, 1, /*Specialized=*/true>::kernel( size, cols, _tri, triStride, _other, 1, otherStride); return; } @@ -253,7 +253,7 @@ i = IsLower ? k2 + k1: k2 - k1 - actualPanelWidth; } #endif - trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>::kernel( + trsmKernelL<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>::kernel( actualPanelWidth, actual_cols, _tri + i + (i)*triStride, triStride, _other + i*OtherInnerStride + j2*otherStride, otherIncr, otherStride); @@ -327,7 +327,7 @@ manage_caching_sizes(GetAction, &l1, &l2, &l3); double L2Cap = 0.5; // 50% of L2 size if (size < avx512_trsm_cutoff<Scalar>(l2, rows, L2Cap)) { - trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>:: + trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>:: kernel(size, rows, _tri, triStride, _other, 1, otherStride); return; } @@ -423,7 +423,7 @@ { // unblocked triangular solve - trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride>:: + trsmKernelR<Scalar, Index, Mode, Conjugate, TriStorageOrder, OtherInnerStride, /*Specialized=*/true>:: kernel(actualPanelWidth, actual_mc, _tri + absolute_j2 + absolute_j2*triStride, triStride, _other + i2*OtherInnerStride + absolute_j2*otherStride, otherIncr, otherStride);
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 41fbe15..b0217a9 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h
@@ -182,6 +182,7 @@ { public: typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; + typedef blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> SubMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1) @@ -191,9 +192,9 @@ eigen_assert(incr==1); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const { - return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); + return SubMapper(&operator()(i, j), m_stride); } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { @@ -316,12 +317,13 @@ { public: typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper; + typedef blas_data_mapper SubMapper; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {} - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const { - return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value()); + return SubMapper(&operator()(i, j), m_stride, m_incr.value()); } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { @@ -448,10 +450,12 @@ template<typename Scalar, typename Index, int StorageOrder> class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> { public: + typedef const_blas_data_mapper<Scalar, Index, StorageOrder> SubMapper; + EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {} - EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const { - return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride); + EIGEN_ALWAYS_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(&(this->operator()(i, j)), this->m_stride); } };
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 0b867d5..0835880 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h
@@ -623,7 +623,9 @@ /// supports Neon vector intrinsics for fp16. #if EIGEN_ARCH_ARM_OR_ARM64 #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE) + // Clang only supports FP16 on aarch64, and not all intrinsics are available + // on A32 anyways even in GCC (e.g. vdiv_f16, vsqrt_f16). + #if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE) #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1 #else #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0 @@ -635,7 +637,9 @@ /// supports Neon scalar intrinsics for fp16. #if EIGEN_ARCH_ARM_OR_ARM64 #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC - #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE) + // Clang only supports FP16 on aarch64, and not all intrinsics are available + // on A32 anyways, even in GCC (e.g. vceqh_f16). + #if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE) #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1 #endif #endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index a192aac..ba5a563 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h
@@ -430,10 +430,34 @@ return (a+b-1) / b; } +// Handle integer comparisons of different signedness. +template <typename X, typename Y, bool XIsInteger = NumTraits<X>::IsInteger, bool XIsSigned = NumTraits<X>::IsSigned, + bool YIsInteger = NumTraits<Y>::IsInteger, bool YIsSigned = NumTraits<Y>::IsSigned> +struct equal_strict_impl { + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) { return x == y; } +}; +template <typename X, typename Y> +struct equal_strict_impl<X, Y, true, false, true, true> { + // X is an unsigned integer + // Y is a signed integer + // if Y is non-negative, it may be represented exactly as its unsigned counterpart. + using UnsignedY = typename internal::make_unsigned<Y>::type; + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) { + return y < Y(0) ? false : (x == static_cast<UnsignedY>(y)); + } +}; +template <typename X, typename Y> +struct equal_strict_impl<X, Y, true, true, true, false> { + // X is a signed integer + // Y is an unsigned integer + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool run(const X& x, const Y& y) { + return equal_strict_impl<Y, X>::run(y, x); + } +}; + // The aim of the following functions is to bypass -Wfloat-equal warnings // when we really want a strict equality comparison on floating points. -template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC -bool equal_strict(const X& x,const Y& y) { return x == y; } +template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool equal_strict(const X& x, const Y& y) { return equal_strict_impl<X, Y>::run(x, y); } #if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC @@ -458,7 +482,7 @@ bool is_exactly_one(const X& x) { return equal_strict(x, typename NumTraits<X>::Literal{1}); } template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC -bool not_equal_strict(const X& x,const Y& y) { return x != y; } +bool not_equal_strict(const X& x,const Y& y) { return !equal_strict_impl<X, Y>::run(x, y); } #if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 49fa37c..d8ac1b3 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h
@@ -17,12 +17,52 @@ namespace internal { -template<typename IndexDest, typename IndexSrc> -EIGEN_DEVICE_FUNC -inline IndexDest convert_index(const IndexSrc& idx) { - // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away: - eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type"); - return IndexDest(idx); + +// useful for unsigned / signed integer comparisons when idx is intended to be non-negative +template <typename IndexType> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename make_unsigned<IndexType>::type returnUnsignedIndexValue( + const IndexType& idx) { + EIGEN_STATIC_ASSERT((NumTraits<IndexType>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + eigen_internal_assert(idx >= 0 && "Index value is negative and target type is unsigned"); + using UnsignedType = typename make_unsigned<IndexType>::type; + return static_cast<UnsignedType>(idx); +} + +template <typename IndexDest, typename IndexSrc, + bool IndexDestIsInteger = NumTraits<IndexDest>::IsInteger, + bool IndexDestIsSigned = NumTraits<IndexDest>::IsSigned, + bool IndexSrcIsInteger = NumTraits<IndexSrc>::IsInteger, + bool IndexSrcIsSigned = NumTraits<IndexSrc>::IsSigned> +struct convert_index_impl { + static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) { + eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value is too big for target type"); + return static_cast<IndexDest>(idx); + } +}; +template <typename IndexDest, typename IndexSrc> +struct convert_index_impl<IndexDest, IndexSrc, true, true, true, false> { + // IndexDest is a signed integer + // IndexSrc is an unsigned integer + static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) { + eigen_internal_assert(idx <= returnUnsignedIndexValue(NumTraits<IndexDest>::highest()) && + "Index value is too big for target type"); + return static_cast<IndexDest>(idx); + } +}; +template <typename IndexDest, typename IndexSrc> +struct convert_index_impl<IndexDest, IndexSrc, true, false, true, true> { + // IndexDest is an unsigned integer + // IndexSrc is a signed integer + static inline EIGEN_DEVICE_FUNC IndexDest run(const IndexSrc& idx) { + eigen_internal_assert(returnUnsignedIndexValue(idx) <= NumTraits<IndexDest>::highest() && + "Index value is too big for target type"); + return static_cast<IndexDest>(idx); + } +}; + +template <typename IndexDest, typename IndexSrc> +EIGEN_DEVICE_FUNC inline IndexDest convert_index(const IndexSrc& idx) { + return convert_index_impl<IndexDest, IndexSrc>::run(idx); } // true if T can be considered as an integral index (i.e., and integral type or enum) @@ -271,7 +311,9 @@ } constexpr inline int size_at_compile_time(int rows, int cols) { - return (rows==Dynamic || cols==Dynamic) ? Dynamic : rows * cols; + if (rows == 0 || cols == 0) return 0; + if (rows == Dynamic || cols == Dynamic) return Dynamic; + return rows * cols; } template<typename XprType> struct size_of_xpr_at_compile_time
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 0aca4c4..3413a51 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h
@@ -298,6 +298,15 @@ */ EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){} + /** Constructs and initializes a quaternion from its real part as a scalar, + * and its imaginary part as a 3-vector [\c x, \c y, \c z] + */ + template <typename Derived> + EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Eigen::MatrixBase<Derived>& vec) + : m_coeffs(vec.x(), vec.y(), vec.z(), w) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived, 3); + } + /** Constructs and initialize a quaternion from the array data */ EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index a69fdca..bb1d3db 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h
@@ -107,6 +107,7 @@ public: using Base::rows; using Base::cols; + using Base::diagSize; using Base::computeU; using Base::computeV; @@ -270,7 +271,6 @@ using Base::m_computationOptions; using Base::m_computeThinU; using Base::m_computeThinV; - using Base::m_diagSize; using Base::m_info; using Base::m_isInitialized; using Base::m_matrixU; @@ -291,7 +291,7 @@ if (cols < m_algoswap) internal::allocate_small_svd<MatrixType, ComputationOptions>::run(smallSvd, rows, cols, computationOptions); - m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize ); + m_computed = MatrixXr::Zero(diagSize() + 1, diagSize() ); m_compU = computeV(); m_compV = computeU(); m_isTranspose = (cols > rows); @@ -307,20 +307,20 @@ m_useQrDecomp = !disableQrDecomp && ((rows / kMinAspectRatio > cols) || (cols / kMinAspectRatio > rows)); if (m_useQrDecomp) { qrDecomp = HouseholderQR<MatrixX>((std::max)(rows, cols), (std::min)(rows, cols)); - reducedTriangle = MatrixX(m_diagSize, m_diagSize); + reducedTriangle = MatrixX(diagSize(), diagSize()); } copyWorkspace = MatrixX(m_isTranspose ? cols : rows, m_isTranspose ? rows : cols); - bid = internal::UpperBidiagonalization<MatrixX>(m_useQrDecomp ? m_diagSize : copyWorkspace.rows(), - m_useQrDecomp ? m_diagSize : copyWorkspace.cols()); + bid = internal::UpperBidiagonalization<MatrixX>(m_useQrDecomp ? diagSize() : copyWorkspace.rows(), + m_useQrDecomp ? diagSize() : copyWorkspace.cols()); - if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 ); - else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 ); + if (m_compU) m_naiveU = MatrixXr::Zero(diagSize() + 1, diagSize() + 1 ); + else m_naiveU = MatrixXr::Zero(2, diagSize() + 1 ); - if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize); + if (m_compV) m_naiveV = MatrixXr::Zero(diagSize(), diagSize()); - m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3); - m_workspaceI.resize(3*m_diagSize); + m_workspace.resize((diagSize()+1)*(diagSize()+1)*3); + m_workspaceI.resize(3*diagSize()); } // end allocate template <typename MatrixType, int Options> @@ -369,7 +369,7 @@ // bidiagonalize the input matrix directly. if (m_useQrDecomp) { qrDecomp.compute(copyWorkspace); - reducedTriangle = qrDecomp.matrixQR().topRows(m_diagSize); + reducedTriangle = qrDecomp.matrixQR().topRows(diagSize()); reducedTriangle.template triangularView<StrictlyLower>().setZero(); bid.compute(reducedTriangle); } else { @@ -380,26 +380,26 @@ m_naiveU.setZero(); m_naiveV.setZero(); // FIXME this line involves a temporary matrix - m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose(); + m_computed.topRows(diagSize()) = bid.bidiagonal().toDenseMatrix().transpose(); m_computed.template bottomRows<1>().setZero(); - divide(0, m_diagSize - 1, 0, 0, 0); + divide(0, diagSize() - 1, 0, 0, 0); if (m_info != Success && m_info != NoConvergence) { m_isInitialized = true; return *this; } //**** step 3 - Copy singular values and vectors - for (int i=0; i<m_diagSize; i++) + for (int i=0; i<diagSize(); i++) { RealScalar a = abs(m_computed.coeff(i, i)); m_singularValues.coeffRef(i) = a * scale; if (a<considerZero) { m_nonzeroSingularValues = i; - m_singularValues.tail(m_diagSize - i - 1).setZero(); + m_singularValues.tail(diagSize() - i - 1).setZero(); break; } - else if (i == m_diagSize - 1) + else if (i == diagSize() - 1) { m_nonzeroSingularValues = i + 1; break; @@ -426,20 +426,20 @@ // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa if (computeU()) { - Index Ucols = m_computeThinU ? m_diagSize : rows(); + Index Ucols = m_computeThinU ? diagSize() : rows(); m_matrixU = MatrixX::Identity(rows(), Ucols); - m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); + m_matrixU.topLeftCorner(diagSize(), diagSize()) = naiveV.template cast<Scalar>().topLeftCorner(diagSize(), diagSize()); // FIXME the following conditionals involve temporary buffers - if (m_useQrDecomp) m_matrixU.topLeftCorner(householderU.cols(), m_diagSize).applyOnTheLeft(householderU); + if (m_useQrDecomp) m_matrixU.topLeftCorner(householderU.cols(), diagSize()).applyOnTheLeft(householderU); else m_matrixU.applyOnTheLeft(householderU); } if (computeV()) { - Index Vcols = m_computeThinV ? m_diagSize : cols(); + Index Vcols = m_computeThinV ? diagSize() : cols(); m_matrixV = MatrixX::Identity(cols(), Vcols); - m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); + m_matrixV.topLeftCorner(diagSize(), diagSize()) = naiveU.template cast<Scalar>().topLeftCorner(diagSize(), diagSize()); // FIXME the following conditionals involve temporary buffers - if (m_useQrDecomp) m_matrixV.topLeftCorner(householderV.cols(), m_diagSize).applyOnTheLeft(householderV); + if (m_useQrDecomp) m_matrixV.topLeftCorner(householderV.cols(), diagSize()).applyOnTheLeft(householderV); else m_matrixV.applyOnTheLeft(householderV); } }
diff --git a/Eigen/src/SVD/BDCSVD_LAPACKE.h b/Eigen/src/SVD/BDCSVD_LAPACKE.h index d4cc173..9a1e843 100644 --- a/Eigen/src/SVD/BDCSVD_LAPACKE.h +++ b/Eigen/src/SVD/BDCSVD_LAPACKE.h
@@ -67,8 +67,8 @@ // prepare arguments to ?gesdd const lapack_int matrix_order = lapack_storage_of(matrix); const char jobz = (SVD::m_computeFullU || SVD::m_computeFullV) ? 'A' : (SVD::m_computeThinU || SVD::m_computeThinV) ? 'S' : 'N'; - const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::m_rows) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1; - const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::m_cols) : (jobz == 'S') ? to_lapack(SVD::m_diagSize) : 1; + const lapack_int u_cols = (jobz == 'A') ? to_lapack(SVD::rows()) : (jobz == 'S') ? to_lapack(SVD::diagSize()) : 1; + const lapack_int vt_rows = (jobz == 'A') ? to_lapack(SVD::cols()) : (jobz == 'S') ? to_lapack(SVD::diagSize()) : 1; lapack_int ldu, ldvt; Scalar *u, *vt, dummy; MatrixType localU; @@ -76,20 +76,20 @@ ldu = to_lapack(SVD::m_matrixU.outerStride()); u = SVD::m_matrixU.data(); } else if (SVD::computeV()) { - localU.resize(SVD::m_rows, u_cols); + localU.resize(SVD::rows(), u_cols); ldu = to_lapack(localU.outerStride()); u = localU.data(); } else { ldu=1; u=&dummy; } MatrixType localV; if (SVD::computeU() || SVD::computeV()) { - localV.resize(vt_rows, SVD::m_cols); + localV.resize(vt_rows, SVD::cols()); ldvt = to_lapack(localV.outerStride()); vt = localV.data(); } else { ldvt=1; vt=&dummy; } MatrixType temp; temp = matrix; // actual call to ?gesdd - lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::m_rows), to_lapack(SVD::m_cols), + lapack_int info = gesdd( matrix_order, jobz, to_lapack(SVD::rows()), to_lapack(SVD::cols()), to_lapack(temp.data()), to_lapack(temp.outerStride()), (RealScalar*)SVD::m_singularValues.data(), to_lapack(u), ldu, to_lapack(vt), ldvt);
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 552dfd8..5fdb3dc 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h
@@ -519,7 +519,7 @@ typedef typename Base::Scalar Scalar; typedef typename Base::RealScalar RealScalar; typedef typename Base::Index Index; - enum { + enum : int { Options = Options_, QRPreconditioner = internal::get_qr_preconditioner(Options), RowsAtCompileTime = Base::RowsAtCompileTime, @@ -625,6 +625,7 @@ using Base::computeV; using Base::rows; using Base::cols; + using Base::diagSize; using Base::rank; private: @@ -632,13 +633,11 @@ JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions); protected: - using Base::m_cols; using Base::m_computationOptions; using Base::m_computeFullU; using Base::m_computeFullV; using Base::m_computeThinU; using Base::m_computeThinV; - using Base::m_diagSize; using Base::m_info; using Base::m_isAllocated; using Base::m_isInitialized; @@ -646,7 +645,6 @@ using Base::m_matrixV; using Base::m_nonzeroSingularValues; using Base::m_prescribedThreshold; - using Base::m_rows; using Base::m_singularValues; using Base::m_usePrescribedThreshold; using Base::ShouldComputeThinU; @@ -671,18 +669,18 @@ }; template <typename MatrixType, int Options> -void JacobiSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int computationOptions) { - if (Base::allocate(rows, cols, computationOptions)) return; +void JacobiSVD<MatrixType, Options>::allocate(Index rows_, Index cols_, unsigned int computationOptions_) { + if (Base::allocate(rows_, cols_, computationOptions_)) return; eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && !(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. " "Use the ColPivHouseholderQR preconditioner instead."); - m_workMatrix.resize(m_diagSize, m_diagSize); - if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); - if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); - if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols); + m_workMatrix.resize(diagSize(), diagSize()); + if(cols()>rows()) m_qr_precond_morecols.allocate(*this); + if(rows()>cols()) m_qr_precond_morerows.allocate(*this); + if(rows()!=cols()) m_scaledMatrix.resize(rows(),cols()); } template <typename MatrixType, int Options> @@ -711,7 +709,7 @@ /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */ - if(m_rows!=m_cols) + if(rows() != cols()) { m_scaledMatrix = matrix / scale; m_qr_precond_morecols.run(*this, m_scaledMatrix); @@ -719,11 +717,11 @@ } else { - m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale; - if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows); - if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize); - if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols); - if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize); + m_workMatrix = matrix.template topLeftCorner<DiagSizeAtCompileTime,DiagSizeAtCompileTime>(diagSize(),diagSize()) / scale; + if(m_computeFullU) m_matrixU.setIdentity(rows(),rows()); + if(m_computeThinU) m_matrixU.setIdentity(rows(),diagSize()); + if(m_computeFullV) m_matrixV.setIdentity(cols(),cols()); + if(m_computeThinV) m_matrixV.setIdentity(cols(),diagSize()); } /*** step 2. The main Jacobi SVD iteration. ***/ @@ -736,7 +734,7 @@ // do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix - for(Index p = 1; p < m_diagSize; ++p) + for(Index p = 1; p < diagSize(); ++p) { for(Index q = 0; q < p; ++q) { @@ -771,7 +769,7 @@ /*** step 3. The work matrix is now diagonal, so ensure it's positive so its diagonal entries are the singular values ***/ - for(Index i = 0; i < m_diagSize; ++i) + for(Index i = 0; i < diagSize(); ++i) { // For a complex matrix, some diagonal coefficients might note have been // treated by svd_precondition_2x2_block_to_be_real, and the imaginary part @@ -795,11 +793,11 @@ /*** step 4. Sort singular values in descending order and compute the number of nonzero singular values ***/ - m_nonzeroSingularValues = m_diagSize; - for(Index i = 0; i < m_diagSize; i++) + m_nonzeroSingularValues = diagSize(); + for(Index i = 0; i < diagSize(); i++) { Index pos; - RealScalar maxRemainingSingularValue = m_singularValues.tail(m_diagSize-i).maxCoeff(&pos); + RealScalar maxRemainingSingularValue = m_singularValues.tail(diagSize()-i).maxCoeff(&pos); if(numext::is_exactly_zero(maxRemainingSingularValue)) { m_nonzeroSingularValues = i;
diff --git a/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/Eigen/src/SVD/JacobiSVD_LAPACKE.h index 93244cd..11247b1 100644 --- a/Eigen/src/SVD/JacobiSVD_LAPACKE.h +++ b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -51,7 +51,7 @@ allocate(matrix.rows(), matrix.cols(), computationOptions); \ \ /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \ - m_nonzeroSingularValues = m_diagSize; \ + m_nonzeroSingularValues = diagSize(); \ \ lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \ lapack_int matrix_order = LAPACKE_COLROW; \ @@ -64,15 +64,15 @@ u = (LAPACKE_TYPE*)m_matrixU.data(); \ } else { ldu=1; u=&dummy; }\ MatrixType localV; \ - lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ + lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(cols()) : (m_computeThinV) ? internal::convert_index<lapack_int>(diagSize()) : 1; \ if (computeV()) { \ - localV.resize(vt_rows, m_cols); \ + localV.resize(vt_rows, cols()); \ ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \ vt = (LAPACKE_TYPE*)localV.data(); \ } else { ldvt=1; vt=&dummy; }\ - Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \ + Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(diagSize(), 1); \ MatrixType m_temp; m_temp = matrix; \ - lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ + lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(rows()), internal::convert_index<lapack_int>(cols()), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ /* Check the result of the LAPACK call */ \ if (info < 0 || !m_singularValues.allFinite()) { \ m_info = InvalidInput; \ @@ -82,7 +82,7 @@ m_info = Success; \ if (computeV()) m_matrixV = localV.adjoint(); \ } \ - /* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \ + /* for(int i=0;i<diagSize();i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \ m_isInitialized = true; \ return *this; \ }
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 10a6d30..cb018cc 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h
@@ -283,8 +283,9 @@ /** \returns true if \a V (full or thin) is asked for in this SVD decomposition */ inline bool computeV() const { return m_computeFullV || m_computeThinV; } - inline Index rows() const { return m_rows; } - inline Index cols() const { return m_cols; } + inline Index rows() const { return m_rows.value(); } + inline Index cols() const { return m_cols.value(); } + inline Index diagSize() const { return m_diagSize.value(); } #ifdef EIGEN_PARSED_BY_DOXYGEN /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A. @@ -348,7 +349,10 @@ bool m_computeFullU, m_computeThinU; bool m_computeFullV, m_computeThinV; unsigned int m_computationOptions; - Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize; + Index m_nonzeroSingularValues; + internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows; + internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols; + internal::variable_if_dynamic<Index, DiagSizeAtCompileTime> m_diagSize; RealScalar m_prescribedThreshold; /** \brief Default Constructor. @@ -368,9 +372,11 @@ m_computeFullV(false), m_computeThinV(false), m_computationOptions(0), - m_rows(-1), - m_cols(-1), - m_diagSize(0) {} + m_nonzeroSingularValues(0), + m_rows(RowsAtCompileTime), + m_cols(ColsAtCompileTime), + m_diagSize(DiagSizeAtCompileTime), + m_prescribedThreshold(0) {} }; #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -409,15 +415,15 @@ eigen_assert(rows >= 0 && cols >= 0); if (m_isAllocated && - rows == m_rows && - cols == m_cols && + rows == m_rows.value() && + cols == m_cols.value() && computationOptions == m_computationOptions) { return true; } - m_rows = rows; - m_cols = cols; + m_rows.setValue(rows); + m_cols.setValue(cols); m_info = Success; m_isInitialized = false; m_isAllocated = true; @@ -430,12 +436,12 @@ eigen_assert(!(m_computeFullU && m_computeThinU) && "SVDBase: you can't ask for both full and thin U"); eigen_assert(!(m_computeFullV && m_computeThinV) && "SVDBase: you can't ask for both full and thin V"); - m_diagSize = (std::min)(m_rows, m_cols); - m_singularValues.resize(m_diagSize); + m_diagSize.setValue(numext::mini(m_rows.value(), m_cols.value())); + m_singularValues.resize(m_diagSize.value()); if(RowsAtCompileTime==Dynamic) - m_matrixU.resize(m_rows, m_computeFullU ? m_rows : m_computeThinU ? m_diagSize : 0); + m_matrixU.resize(m_rows.value(), m_computeFullU ? m_rows.value() : m_computeThinU ? m_diagSize.value() : 0); if(ColsAtCompileTime==Dynamic) - m_matrixV.resize(m_cols, m_computeFullV ? m_cols : m_computeThinV ? m_diagSize : 0); + m_matrixV.resize(m_cols.value(), m_computeFullV ? m_cols.value() : m_computeThinV ? m_diagSize.value() : 0); return false; }
diff --git a/Eigen/src/ThreadPool/EventCount.h b/Eigen/src/ThreadPool/EventCount.h index 034637c..3b57d27 100644 --- a/Eigen/src/ThreadPool/EventCount.h +++ b/Eigen/src/ThreadPool/EventCount.h
@@ -230,7 +230,7 @@ void Unpark(Waiter* w) { for (Waiter* next; w; w = next) { uint64_t wnext = w->next.load(std::memory_order_relaxed) & kStackMask; - next = wnext == kStackMask ? nullptr : &waiters_[wnext]; + next = wnext == kStackMask ? nullptr : &waiters_[internal::convert_index<size_t>(wnext)]; unsigned state; { EIGEN_MUTEX_LOCK lock(w->mu);
diff --git a/Eigen/src/plugins/IndexedViewMethods.h b/Eigen/src/plugins/IndexedViewMethods.h index 78f12fe..e5432ea 100644 --- a/Eigen/src/plugins/IndexedViewMethods.h +++ b/Eigen/src/plugins/IndexedViewMethods.h
@@ -37,7 +37,7 @@ } template <typename Indices> -inline IvcColType<Indices> ivcSize(const Indices& indices) const { +inline IvcType<Indices> ivcSize(const Indices& indices) const { return internal::makeIndexedViewCompatible( indices, internal::variable_if_dynamic<Index, SizeAtCompileTime>(derived().size()), Specialized); }
diff --git a/doc/CustomizingEigen_CustomScalar.dox b/doc/CustomizingEigen_CustomScalar.dox index 24e5f56..672cfc3 100644 --- a/doc/CustomizingEigen_CustomScalar.dox +++ b/doc/CustomizingEigen_CustomScalar.dox
@@ -76,6 +76,7 @@ static inline Real epsilon() { return 0; } static inline Real dummy_precision() { return 0; } static inline int digits10() { return 0; } + static inline int max_digits10() { return 0; } enum { IsInteger = 0,
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index 1648c7c..d06fa2c 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp
@@ -87,21 +87,21 @@ template <typename Scalar, typename Fn, typename RefFn> void binary_op_test(std::string name, Fn fun, RefFn ref) { const Scalar tol = test_precision<Scalar>(); - Array<Scalar, Dynamic, Dynamic> x; - Array<Scalar, Dynamic, Dynamic> y; - special_value_pairs(x, y); + Array<Scalar, Dynamic, Dynamic> lhs; + Array<Scalar, Dynamic, Dynamic> rhs; + special_value_pairs(lhs, rhs); - Array<Scalar, Dynamic, Dynamic> actual = fun(x, y); + Array<Scalar, Dynamic, Dynamic> actual = fun(lhs, rhs); bool all_pass = true; - for (Index i = 0; i < x.rows(); ++i) { - for (Index j = 0; j < x.cols(); ++j) { - Scalar e = static_cast<Scalar>(ref(x(i,j), y(i,j))); + for (Index i = 0; i < lhs.rows(); ++i) { + for (Index j = 0; j < lhs.cols(); ++j) { + Scalar e = static_cast<Scalar>(ref(lhs(i,j), rhs(i,j))); Scalar a = actual(i, j); bool success = (a==e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e)); if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a); all_pass &= success; if (!success) { - std::cout << name << "(" << x(i,j) << "," << y(i,j) << ") = " << a << " != " << e << std::endl; + std::cout << name << "(" << lhs(i,j) << "," << rhs(i,j) << ") = " << a << " != " << e << std::endl; } } } @@ -139,27 +139,27 @@ void unary_op_test(std::string name, Fn fun, RefFn ref) { const Scalar tol = test_precision<Scalar>(); auto values = special_values<Scalar>(); - Map<Array<Scalar, Dynamic, 1>> x(values.data(), values.size()); + Map<Array<Scalar, Dynamic, 1>> valuesMap(values.data(), values.size()); - Array<Scalar, Dynamic, Dynamic> actual = fun(x); + Array<Scalar, Dynamic, Dynamic> actual = fun(valuesMap); bool all_pass = true; - for (Index i = 0; i < x.size(); ++i) { - Scalar e = static_cast<Scalar>(ref(x(i))); + for (Index i = 0; i < valuesMap.size(); ++i) { + Scalar e = static_cast<Scalar>(ref(valuesMap(i))); Scalar a = actual(i); bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e)); if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a); all_pass &= success; if (!success) { - std::cout << name << "(" << x(i) << ") = " << a << " != " << e << std::endl; + std::cout << name << "(" << valuesMap(i) << ") = " << a << " != " << e << std::endl; } } VERIFY(all_pass); } #define UNARY_FUNCTOR_TEST_ARGS(fun) #fun, \ - [](const auto& x) { return (Eigen::fun)(x); }, \ - [](const auto& x) { return (std::fun)(x); } + [](const auto& x_) { return (Eigen::fun)(x_); }, \ + [](const auto& y_) { return (std::fun)(y_); } template <typename Scalar> void unary_ops_test() { @@ -267,8 +267,15 @@ #ifdef EIGEN_COMP_MSVC // Work around MSVC return value on underflow. // if std::pow returns 0 and Eigen returns a denormalized value, then skip the test - int fpclass = std::fpclassify(a); - if (e == Base(0) && fpclass == FP_SUBNORMAL) continue; + int eigen_fpclass = std::fpclassify(a); + if (e == Base(0) && eigen_fpclass == FP_SUBNORMAL) continue; + #endif + + #ifdef EIGEN_VECTORIZE_NEON + // Work around NEON flush-to-zero mode + // if std::pow returns denormalized value and Eigen returns 0, then skip the test + int ref_fpclass = std::fpclassify(e); + if (a == Base(0) && ref_fpclass == FP_SUBNORMAL) continue; #endif bool both_nan = (numext::isnan)(a) && (numext::isnan)(e); @@ -330,10 +337,6 @@ for (Base a : y) { Base e = ref_pow<Base, Exponent>::run(base, exponent); bool pass = (a == e); - //if (!NumTraits<Base>::IsInteger) { - // pass = pass || (((numext::isfinite)(e) && internal::isApprox(a, e)) || - // ((numext::isnan)(a) && (numext::isnan)(e))); - //} all_pass &= pass; if (!pass) { std::cout << "pow(" << base << "," << exponent << ") = " << a << " != " << e << std::endl; @@ -442,12 +445,6 @@ signbit_test<double>(); signbit_test<Eigen::half>(); signbit_test<Eigen::bfloat16>(); - - signbit_test<uint8_t>(); - signbit_test<uint16_t>(); - signbit_test<uint32_t>(); - signbit_test<uint64_t>(); - signbit_test<int8_t>(); signbit_test<int16_t>(); signbit_test<int32_t>(); @@ -1324,67 +1321,62 @@ CALL_SUBTEST_2( array_generic(Array22f()) ); CALL_SUBTEST_3( array_generic(Array44d()) ); CALL_SUBTEST_4( array_generic(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_5( array_generic(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_6( array_generic(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_6( array_generic(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_7( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_7( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_8( array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_8( array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_7( array_generic(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( array_generic(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( array_generic(Array<Index,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( signed_shift_test(ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_9( signed_shift_test(Array<Index, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_10( array_generic(Array<uint32_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_11( array_generic(Array<uint64_t, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); } for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( comparisons(Array<float, 1, 1>()) ); CALL_SUBTEST_2( comparisons(Array22f()) ); CALL_SUBTEST_3( comparisons(Array44d()) ); - CALL_SUBTEST_5( comparisons(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_6( comparisons(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( comparisons(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( comparisons(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); } for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( min_max(Array<float, 1, 1>()) ); - CALL_SUBTEST_2( min_max(Array22f()) ); - CALL_SUBTEST_3( min_max(Array44d()) ); - CALL_SUBTEST_5( min_max(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_6( min_max(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( min_max(Array<float, 1, 1>()) ); + CALL_SUBTEST_7( min_max(Array22f()) ); + CALL_SUBTEST_8( min_max(Array44d()) ); + CALL_SUBTEST_9( min_max(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_10( min_max(ArrayXXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); } for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( array_real(Array<float, 1, 1>()) ); - CALL_SUBTEST_2( array_real(Array22f()) ); - CALL_SUBTEST_3( array_real(Array44d()) ); - CALL_SUBTEST_5( array_real(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_7( array_real(Array<Eigen::half, 32, 32>()) ); - CALL_SUBTEST_8( array_real(Array<Eigen::bfloat16, 32, 32>()) ); + CALL_SUBTEST_11( array_real(Array<float, 1, 1>()) ); + CALL_SUBTEST_12( array_real(Array22f()) ); + CALL_SUBTEST_13( array_real(Array44d()) ); + CALL_SUBTEST_14( array_real(ArrayXXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_15( array_real(Array<Eigen::half, 32, 32>()) ); + CALL_SUBTEST_16( array_real(Array<Eigen::bfloat16, 32, 32>()) ); } for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_4( array_complex(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_5( array_complex(ArrayXXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_17( array_complex(ArrayXXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_18( array_complex(ArrayXXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE)))); } for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_5( float_pow_test() ); - CALL_SUBTEST_6( int_pow_test() ); - CALL_SUBTEST_7( mixed_pow_test() ); - CALL_SUBTEST_8( signbit_tests() ); + CALL_SUBTEST_19( float_pow_test() ); + CALL_SUBTEST_20( int_pow_test() ); + CALL_SUBTEST_21( mixed_pow_test() ); + CALL_SUBTEST_22( signbit_tests() ); } for (int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_2( typed_logicals_test(ArrayX<int>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_2( typed_logicals_test(ArrayX<float>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_3( typed_logicals_test(ArrayX<double>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_3( typed_logicals_test(ArrayX<std::complex<float>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_3( typed_logicals_test(ArrayX<std::complex<double>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_23( typed_logicals_test(ArrayX<int>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_24( typed_logicals_test(ArrayX<float>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_25( typed_logicals_test(ArrayX<double>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_26( typed_logicals_test(ArrayX<std::complex<float>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_27( typed_logicals_test(ArrayX<std::complex<double>>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); } for (int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1((cast_test<1, 1>())); - CALL_SUBTEST_2((cast_test<3, 1>())); - CALL_SUBTEST_2((cast_test<3, 3>())); - CALL_SUBTEST_3((cast_test<5, 1>())); - CALL_SUBTEST_3((cast_test<5, 5>())); - CALL_SUBTEST_4((cast_test<9, 1>())); - CALL_SUBTEST_4((cast_test<9, 9>())); - CALL_SUBTEST_5((cast_test<17, 1>())); - CALL_SUBTEST_5((cast_test<17, 17>())); - CALL_SUBTEST_6((cast_test<Dynamic, 1>())); - CALL_SUBTEST_6((cast_test<Dynamic, Dynamic>())); + CALL_SUBTEST_28( (cast_test<1, 1>()) ); + CALL_SUBTEST_29( (cast_test<3, 1>()) ); + CALL_SUBTEST_30( (cast_test<5, 1>()) ); + CALL_SUBTEST_31( (cast_test<9, 1>()) ); + CALL_SUBTEST_32( (cast_test<17, 1>()) ); + CALL_SUBTEST_33( (cast_test<Dynamic, 1>()) ); } VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp index 47dfc04..05cd6ad 100644 --- a/test/basicstuff.cpp +++ b/test/basicstuff.cpp
@@ -69,8 +69,10 @@ x = v1(static_cast<unsigned int>(r1)); x = v1(static_cast<signed long>(r1)); x = v1(static_cast<unsigned long>(r1)); - x = v1(static_cast<long long int>(r1)); - x = v1(static_cast<unsigned long long int>(r1)); + if(sizeof(Index) >= sizeof(long long int)) + x = v1(static_cast<long long int>(r1)); + if(sizeof(Index) >= sizeof(unsigned long long int)) + x = v1(static_cast<unsigned long long int>(r1)); VERIFY_IS_APPROX( v1, v1); VERIFY_IS_NOT_APPROX( v1, 2*v1);
diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp index 4752809..4f0a137 100644 --- a/test/bdcsvd.cpp +++ b/test/bdcsvd.cpp
@@ -69,8 +69,13 @@ } template <typename MatrixType> -void bdcsvd_all_options(const MatrixType& input = MatrixType()) { - svd_option_checks<MatrixType, 0>(input); +void bdcsvd_thin_options(const MatrixType& input = MatrixType()) { + svd_thin_option_checks<MatrixType, 0>(input); +} + +template <typename MatrixType> +void bdcsvd_full_options(const MatrixType& input = MatrixType()) { + svd_option_checks_full_only<MatrixType, 0>(input); } template <typename MatrixType> @@ -82,13 +87,15 @@ EIGEN_DECLARE_TEST(bdcsvd) { CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix3f>())); - CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix4d>())); - CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 10, 7>>())); - CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix<float, 7, 10>>())); - CALL_SUBTEST_3((bdcsvd_verify_assert<Matrix<std::complex<double>, 6, 9>>())); + CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix4d>())); + CALL_SUBTEST_3((bdcsvd_verify_assert<Matrix<float, 10, 7>>())); + CALL_SUBTEST_4((bdcsvd_verify_assert<Matrix<float, 7, 10>>())); + CALL_SUBTEST_5((bdcsvd_verify_assert<Matrix<std::complex<double>, 6, 9>>())); - CALL_SUBTEST_4((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2cd>))); - CALL_SUBTEST_5((svd_all_trivial_2x2(bdcsvd_all_options<Matrix2d>))); + CALL_SUBTEST_6((svd_all_trivial_2x2(bdcsvd_thin_options<Matrix2cd>))); + CALL_SUBTEST_7((svd_all_trivial_2x2(bdcsvd_full_options<Matrix2cd>))); + CALL_SUBTEST_8((svd_all_trivial_2x2(bdcsvd_thin_options<Matrix2d>))); + CALL_SUBTEST_9((svd_all_trivial_2x2(bdcsvd_full_options<Matrix2d>))); for (int i = 0; i < g_repeat; i++) { int r = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2), @@ -97,58 +104,68 @@ TEST_SET_BUT_UNUSED_VARIABLE(r) TEST_SET_BUT_UNUSED_VARIABLE(c) - CALL_SUBTEST_6((compare_bdc_jacobi<MatrixXf>(MatrixXf(r, c)))); - CALL_SUBTEST_7((compare_bdc_jacobi<MatrixXd>(MatrixXd(r, c)))); - CALL_SUBTEST_8((compare_bdc_jacobi<MatrixXcd>(MatrixXcd(r, c)))); + CALL_SUBTEST_10((compare_bdc_jacobi<MatrixXf>(MatrixXf(r, c)))); + CALL_SUBTEST_11((compare_bdc_jacobi<MatrixXd>(MatrixXd(r, c)))); + CALL_SUBTEST_12((compare_bdc_jacobi<MatrixXcd>(MatrixXcd(r, c)))); // Test on inf/nan matrix - CALL_SUBTEST_9((svd_inf_nan<MatrixXf>())); - CALL_SUBTEST_10((svd_inf_nan<MatrixXd>())); + CALL_SUBTEST_13((svd_inf_nan<MatrixXf>())); + CALL_SUBTEST_14((svd_inf_nan<MatrixXd>())); // Verify some computations using all combinations of the Options template parameter. - CALL_SUBTEST_11((bdcsvd_all_options<Matrix3f>())); - CALL_SUBTEST_12((bdcsvd_all_options<Matrix<float, 2, 3>>())); - CALL_SUBTEST_13((bdcsvd_all_options<MatrixXd>(MatrixXd(20, 17)))); - CALL_SUBTEST_14((bdcsvd_all_options<MatrixXd>(MatrixXd(17, 20)))); - CALL_SUBTEST_15((bdcsvd_all_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15)))); - CALL_SUBTEST_16((bdcsvd_all_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c)))); - CALL_SUBTEST_17((bdcsvd_all_options<MatrixXf>(MatrixXf(r, c)))); - CALL_SUBTEST_18((bdcsvd_all_options<MatrixXcd>(MatrixXcd(r, c)))); - CALL_SUBTEST_19((bdcsvd_all_options<MatrixXd>(MatrixXd(r, c)))); - CALL_SUBTEST_20((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27)))); - CALL_SUBTEST_21((bdcsvd_all_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20)))); - - CALL_SUBTEST_22(( + CALL_SUBTEST_15((bdcsvd_thin_options<Matrix3f>())); + CALL_SUBTEST_16((bdcsvd_full_options<Matrix3f>())); + CALL_SUBTEST_17((bdcsvd_thin_options<Matrix<float, 2, 3>>())); + CALL_SUBTEST_18((bdcsvd_full_options<Matrix<float, 2, 3>>())); + CALL_SUBTEST_19((bdcsvd_thin_options<MatrixXd>(MatrixXd(20, 17)))); + CALL_SUBTEST_20((bdcsvd_full_options<MatrixXd>(MatrixXd(20, 17)))); + CALL_SUBTEST_21((bdcsvd_thin_options<MatrixXd>(MatrixXd(17, 20)))); + CALL_SUBTEST_22((bdcsvd_full_options<MatrixXd>(MatrixXd(17, 20)))); + CALL_SUBTEST_23((bdcsvd_thin_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15)))); + CALL_SUBTEST_24((bdcsvd_full_options<Matrix<double, Dynamic, 15>>(Matrix<double, Dynamic, 15>(r, 15)))); + CALL_SUBTEST_25((bdcsvd_thin_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c)))); + CALL_SUBTEST_26((bdcsvd_full_options<Matrix<double, 13, Dynamic>>(Matrix<double, 13, Dynamic>(13, c)))); + CALL_SUBTEST_27((bdcsvd_thin_options<MatrixXf>(MatrixXf(r, c)))); + CALL_SUBTEST_28((bdcsvd_full_options<MatrixXf>(MatrixXf(r, c)))); + CALL_SUBTEST_29((bdcsvd_thin_options<MatrixXcd>(MatrixXcd(r, c)))); + CALL_SUBTEST_30((bdcsvd_full_options<MatrixXcd>(MatrixXcd(r, c)))); + CALL_SUBTEST_31((bdcsvd_thin_options<MatrixXd>(MatrixXd(r, c)))); + CALL_SUBTEST_32((bdcsvd_full_options<MatrixXd>(MatrixXd(r, c)))); + CALL_SUBTEST_33((bdcsvd_thin_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27)))); + CALL_SUBTEST_34((bdcsvd_full_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(20, 27)))); + CALL_SUBTEST_35((bdcsvd_thin_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20)))); + CALL_SUBTEST_36((bdcsvd_full_options<Matrix<double, Dynamic, Dynamic, RowMajor>>(Matrix<double, Dynamic, Dynamic, RowMajor>(27, 20)))); + CALL_SUBTEST_37(( svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 20, 35>, ColPivHouseholderQRPreconditioner>( r, c))); - CALL_SUBTEST_22( + CALL_SUBTEST_38( (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 35, 20>, HouseholderQRPreconditioner>(r, c))); - CALL_SUBTEST_22(( + CALL_SUBTEST_39(( svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 20, 35>, ColPivHouseholderQRPreconditioner>( r, c))); - CALL_SUBTEST_22( + CALL_SUBTEST_40( (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 35, 20>, HouseholderQRPreconditioner>(r, c))); } // test matrixbase method - CALL_SUBTEST_23(( bdcsvd_method<Matrix2cd>() )); - CALL_SUBTEST_23(( bdcsvd_method<Matrix3f>() )); + CALL_SUBTEST_41(( bdcsvd_method<Matrix2cd>() )); + CALL_SUBTEST_42(( bdcsvd_method<Matrix3f>() )); // Test problem size constructors - CALL_SUBTEST_24( BDCSVD<MatrixXf>(10,10) ); + CALL_SUBTEST_43( BDCSVD<MatrixXf>(10,10) ); // Check that preallocation avoids subsequent mallocs // Disabled because not supported by BDCSVD // CALL_SUBTEST_9( svd_preallocate<void>() ); - CALL_SUBTEST_25( svd_underoverflow<void>() ); + CALL_SUBTEST_44( svd_underoverflow<void>() ); // Without total deflation issues. - CALL_SUBTEST_26(( compare_bdc_jacobi_instance(true) )); - CALL_SUBTEST_26(( compare_bdc_jacobi_instance(false) )); + CALL_SUBTEST_45(( compare_bdc_jacobi_instance(true) )); + CALL_SUBTEST_46(( compare_bdc_jacobi_instance(false) )); // With total deflation issues before, when it shouldn't be triggered. - CALL_SUBTEST_27(( compare_bdc_jacobi_instance(true, 3) )); - CALL_SUBTEST_27(( compare_bdc_jacobi_instance(false, 3) )); + CALL_SUBTEST_47(( compare_bdc_jacobi_instance(true, 3) )); + CALL_SUBTEST_48(( compare_bdc_jacobi_instance(false, 3) )); }
diff --git a/test/block.cpp b/test/block.cpp index f8583c3..aba0896 100644 --- a/test/block.cpp +++ b/test/block.cpp
@@ -40,14 +40,14 @@ std::enable_if_t<((MatrixType::Flags&RowMajorBit)==0),void> check_left_top(const MatrixType& m, Index r, Index c, Index rows, Index /*unused*/) { - VERIFY_IS_EQUAL(m.leftCols(c).coeff(r+c*rows), m(r,c)); + if(c > 0) VERIFY_IS_EQUAL(m.leftCols(c).coeff(r+c*rows), m(r,c)); } template <typename MatrixType> std::enable_if_t<((MatrixType::Flags&RowMajorBit)!=0),void> check_left_top(const MatrixType& m, Index r, Index c, Index /*unused*/, Index cols) { - VERIFY_IS_EQUAL(m.topRows(r).coeff(c+r*cols), m(r,c)); + if(r > 0) VERIFY_IS_EQUAL(m.topRows(r).coeff(c+r*cols), m(r,c)); } template<typename MatrixType> void block(const MatrixType& m)
diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp index e2fc9a8..5759888 100644 --- a/test/boostmultiprec.cpp +++ b/test/boostmultiprec.cpp
@@ -156,6 +156,7 @@ std::cout << "NumTraits<Real>::lowest() = " << NumTraits<Real>::lowest() << std::endl; std::cout << "NumTraits<Real>::highest() = " << NumTraits<Real>::highest() << std::endl; std::cout << "NumTraits<Real>::digits10() = " << NumTraits<Real>::digits10() << std::endl; + std::cout << "NumTraits<Real>::max_digits10() = " << NumTraits<Real>::max_digits10() << std::endl; // check stream output {
diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp index a821cf2..2eef40b 100644 --- a/test/geo_quaternion.cpp +++ b/test/geo_quaternion.cpp
@@ -98,6 +98,10 @@ VERIFY_IS_MUCH_SMALLER_THAN(abs(q1.angularDistance(q2) - refangle), Scalar(1)); } + // Action on vector by the q v q* formula + VERIFY_IS_APPROX(q1 * v2, (q1 * Quaternionx(Scalar(0), v2) * q1.inverse()).vec()); + VERIFY_IS_APPROX(q1.inverse() * v2, (q1.inverse() * Quaternionx(Scalar(0), v2) * q1).vec()); + // rotation matrix conversion VERIFY_IS_APPROX(q1 * v2, q1.toRotationMatrix() * v2); VERIFY_IS_APPROX(q1 * q2 * v2,
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp index 3d6b630..9416b84 100644 --- a/test/jacobisvd.cpp +++ b/test/jacobisvd.cpp
@@ -35,12 +35,23 @@ } template <typename MatrixType> -void jacobisvd_all_options(const MatrixType& input = MatrixType()) { +void jacobisvd_thin_options(const MatrixType& input = MatrixType()) { MatrixType m(input.rows(), input.cols()); svd_fill_random(m); - svd_option_checks<MatrixType, 0>(m); - svd_option_checks<MatrixType, ColPivHouseholderQRPreconditioner>(m); - svd_option_checks<MatrixType, HouseholderQRPreconditioner>(m); + + svd_thin_option_checks<MatrixType, 0>(m); + svd_thin_option_checks<MatrixType, ColPivHouseholderQRPreconditioner>(m); + svd_thin_option_checks<MatrixType, HouseholderQRPreconditioner>(m); +} + +template <typename MatrixType> +void jacobisvd_full_options(const MatrixType& input = MatrixType()) { + MatrixType m(input.rows(), input.cols()); + svd_fill_random(m); + + svd_option_checks_full_only<MatrixType, 0>(m); + svd_option_checks_full_only<MatrixType, ColPivHouseholderQRPreconditioner>(m); + svd_option_checks_full_only<MatrixType, HouseholderQRPreconditioner>(m); svd_option_checks_full_only<MatrixType, FullPivHouseholderQRPreconditioner>( m); // FullPiv only used when computing full unitaries } @@ -103,18 +114,18 @@ EIGEN_DECLARE_TEST(jacobisvd) { CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix4d>())); - CALL_SUBTEST_1((jacobisvd_verify_inputs(Matrix<float, 5, Dynamic>(5, 6)))); - CALL_SUBTEST_1((jacobisvd_verify_inputs<Matrix<std::complex<double>, 7, 5>>())); + CALL_SUBTEST_2((jacobisvd_verify_inputs(Matrix<float, 5, Dynamic>(5, 6)))); + CALL_SUBTEST_3((jacobisvd_verify_inputs<Matrix<std::complex<double>, 7, 5>>())); - CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix3f>())); - CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix4d>())); - CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 10, 12>>())); - CALL_SUBTEST_2((jacobisvd_verify_assert<Matrix<float, 12, 10>>())); - CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXf>(MatrixXf(10, 12)))); - CALL_SUBTEST_2((jacobisvd_verify_assert<MatrixXcd>(MatrixXcd(7, 5)))); + CALL_SUBTEST_4((jacobisvd_verify_assert<Matrix3f>())); + CALL_SUBTEST_5((jacobisvd_verify_assert<Matrix4d>())); + CALL_SUBTEST_6((jacobisvd_verify_assert<Matrix<float, 10, 12>>())); + CALL_SUBTEST_7((jacobisvd_verify_assert<Matrix<float, 12, 10>>())); + CALL_SUBTEST_8((jacobisvd_verify_assert<MatrixXf>(MatrixXf(10, 12)))); + CALL_SUBTEST_9((jacobisvd_verify_assert<MatrixXcd>(MatrixXcd(7, 5)))); - CALL_SUBTEST_3(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2cd>)); - CALL_SUBTEST_4(svd_all_trivial_2x2(jacobisvd_all_options<Matrix2d>)); + CALL_SUBTEST_10(svd_all_trivial_2x2(jacobisvd_thin_options<Matrix2cd>)); + CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd_thin_options<Matrix2d>)); for (int i = 0; i < g_repeat; i++) { int r = internal::random<int>(1, 30), @@ -123,64 +134,83 @@ TEST_SET_BUT_UNUSED_VARIABLE(r) TEST_SET_BUT_UNUSED_VARIABLE(c) - CALL_SUBTEST_5((jacobisvd_all_options<Matrix3f>())); - CALL_SUBTEST_6((jacobisvd_all_options<Matrix4d>())); - CALL_SUBTEST_7((jacobisvd_all_options<Matrix<float, 2, 3>>())); - CALL_SUBTEST_8((jacobisvd_all_options<Matrix<double, 4, 7>>())); - CALL_SUBTEST_9((jacobisvd_all_options<Matrix<double, 7, 4>>())); - CALL_SUBTEST_10((jacobisvd_all_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5)))); - CALL_SUBTEST_11((jacobisvd_all_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c)))); - CALL_SUBTEST_12((jacobisvd_all_options<MatrixXf>(MatrixXf(r, c)))); - CALL_SUBTEST_13((jacobisvd_all_options<MatrixXcd>(MatrixXcd(r, c)))); - CALL_SUBTEST_14((jacobisvd_all_options<MatrixXd>(MatrixXd(r, c)))); - CALL_SUBTEST_15((jacobisvd_all_options<Matrix<double, 5, 7, RowMajor>>())); - CALL_SUBTEST_16((jacobisvd_all_options<Matrix<double, 7, 5, RowMajor>>())); + CALL_SUBTEST_12((jacobisvd_thin_options<Matrix3f>())); + CALL_SUBTEST_13((jacobisvd_full_options<Matrix3f>())); + CALL_SUBTEST_14((jacobisvd_thin_options<Matrix4d>())); + CALL_SUBTEST_15((jacobisvd_full_options<Matrix4d>())); + CALL_SUBTEST_16((jacobisvd_thin_options<Matrix<float, 2, 3>>())); + CALL_SUBTEST_17((jacobisvd_full_options<Matrix<float, 2, 3>>())); + CALL_SUBTEST_18((jacobisvd_thin_options<Matrix<double, 4, 7>>())); + CALL_SUBTEST_19((jacobisvd_full_options<Matrix<double, 4, 7>>())); + CALL_SUBTEST_20((jacobisvd_thin_options<Matrix<double, 7, 4>>())); + CALL_SUBTEST_21((jacobisvd_full_options<Matrix<double, 7, 4>>())); + CALL_SUBTEST_22((jacobisvd_thin_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5)))); + CALL_SUBTEST_23((jacobisvd_full_options<Matrix<double, Dynamic, 5>>(Matrix<double, Dynamic, 5>(r, 5)))); + CALL_SUBTEST_24((jacobisvd_thin_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c)))); + CALL_SUBTEST_25((jacobisvd_full_options<Matrix<double, 5, Dynamic>>(Matrix<double, 5, Dynamic>(5, c)))); + CALL_SUBTEST_26((jacobisvd_thin_options<MatrixXf>(MatrixXf(r, c)))); + CALL_SUBTEST_27((jacobisvd_full_options<MatrixXf>(MatrixXf(r, c)))); + CALL_SUBTEST_28((jacobisvd_thin_options<MatrixXcd>(MatrixXcd(r, c)))); + CALL_SUBTEST_29((jacobisvd_full_options<MatrixXcd>(MatrixXcd(r, c)))); + CALL_SUBTEST_30((jacobisvd_thin_options<MatrixXd>(MatrixXd(r, c)))); + CALL_SUBTEST_31((jacobisvd_full_options<MatrixXd>(MatrixXd(r, c)))); + CALL_SUBTEST_32((jacobisvd_thin_options<Matrix<double, 5, 7, RowMajor>>())); + CALL_SUBTEST_33((jacobisvd_full_options<Matrix<double, 5, 7, RowMajor>>())); + CALL_SUBTEST_34((jacobisvd_thin_options<Matrix<double, 7, 5, RowMajor>>())); + CALL_SUBTEST_35((jacobisvd_full_options<Matrix<double, 7, 5, RowMajor>>())); MatrixXcd noQRTest = MatrixXcd(r, r); svd_fill_random(noQRTest); - CALL_SUBTEST_17((svd_option_checks<MatrixXcd, NoQRPreconditioner>(noQRTest))); + CALL_SUBTEST_36((svd_thin_option_checks<MatrixXcd, NoQRPreconditioner>(noQRTest))); + CALL_SUBTEST_36((svd_option_checks_full_only<MatrixXcd, NoQRPreconditioner>(noQRTest))); - CALL_SUBTEST_18(( + CALL_SUBTEST_37(( svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 13, 15>, ColPivHouseholderQRPreconditioner>( r, c))); - CALL_SUBTEST_18( + CALL_SUBTEST_38( (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, ColMajor, 15, 13>, HouseholderQRPreconditioner>(r, c))); - CALL_SUBTEST_18(( + CALL_SUBTEST_39(( svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 13, 15>, ColPivHouseholderQRPreconditioner>( r, c))); - CALL_SUBTEST_18( + CALL_SUBTEST_40( (svd_check_max_size_matrix<Matrix<float, Dynamic, Dynamic, RowMajor, 15, 13>, HouseholderQRPreconditioner>(r, c))); // Test on inf/nan matrix - CALL_SUBTEST_19((svd_inf_nan<MatrixXf>())); - CALL_SUBTEST_19((svd_inf_nan<MatrixXd>())); + CALL_SUBTEST_41((svd_inf_nan<MatrixXf>())); + CALL_SUBTEST_42((svd_inf_nan<MatrixXd>())); - CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 6, 1>>())); - CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, 6>>())); - CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, Dynamic, 1>>(Matrix<double, Dynamic, 1>(r)))); - CALL_SUBTEST_20((jacobisvd_verify_assert<Matrix<double, 1, Dynamic>>(Matrix<double, 1, Dynamic>(c)))); + CALL_SUBTEST_43((jacobisvd_verify_assert<Matrix<double, 6, 1>>())); + CALL_SUBTEST_44((jacobisvd_verify_assert<Matrix<double, 1, 6>>())); + CALL_SUBTEST_45((jacobisvd_verify_assert<Matrix<double, Dynamic, 1>>(Matrix<double, Dynamic, 1>(r)))); + CALL_SUBTEST_46((jacobisvd_verify_assert<Matrix<double, 1, Dynamic>>(Matrix<double, 1, Dynamic>(c)))); } - CALL_SUBTEST_21((jacobisvd_all_options<MatrixXd>( + CALL_SUBTEST_47((jacobisvd_thin_options<MatrixXd>( MatrixXd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2), internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2))))); - CALL_SUBTEST_22((jacobisvd_all_options<MatrixXcd>( + CALL_SUBTEST_48((jacobisvd_full_options<MatrixXd>( + MatrixXd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2), + internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 2))))); + CALL_SUBTEST_49((jacobisvd_thin_options<MatrixXcd>( + MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3), + internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3))))); + CALL_SUBTEST_50((jacobisvd_full_options<MatrixXcd>( MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3), internal::random<int>(EIGEN_TEST_MAX_SIZE / 4, EIGEN_TEST_MAX_SIZE / 3))))); // test matrixbase method - CALL_SUBTEST_23(( jacobisvd_method<Matrix2cd>() )); - CALL_SUBTEST_23(( jacobisvd_method<Matrix3f>() )); + CALL_SUBTEST_51(( jacobisvd_method<Matrix2cd>() )); + CALL_SUBTEST_52(( jacobisvd_method<Matrix3f>() )); // Test problem size constructors - CALL_SUBTEST_24( JacobiSVD<MatrixXf>(10,10) ); + CALL_SUBTEST_53( JacobiSVD<MatrixXf>(10,10) ); // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_25( svd_preallocate<void>() ); + CALL_SUBTEST_54( svd_preallocate<void>() ); - CALL_SUBTEST_26( svd_underoverflow<void>() ); + CALL_SUBTEST_55( svd_underoverflow<void>() ); msvc_workaround(); }
diff --git a/test/numext.cpp b/test/numext.cpp index e99eddc..cca0411 100644 --- a/test/numext.cpp +++ b/test/numext.cpp
@@ -286,9 +286,9 @@ return all_pass; }; - bool all_pass = check_all(non_negative_values, false_mask); - all_pass = all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask)); - VERIFY(all_pass); + bool check_all_pass = check_all(non_negative_values, false_mask); + check_all_pass = check_all_pass && check_all(negative_values, (NumTraits<T>::IsSigned ? true_mask : false_mask)); + VERIFY(check_all_pass); } }; template <typename T>
diff --git a/test/random_matrix.cpp b/test/random_matrix.cpp index 873845f..a914af1 100644 --- a/test/random_matrix.cpp +++ b/test/random_matrix.cpp
@@ -63,7 +63,7 @@ { RealVectorType svs = setupRangeSvs<RealVectorType, RealScalar>(diag_size, min_svs, max_svs); - MatrixType M; + MatrixType M = MatrixType::Zero(rows, cols); generateRandomMatrixSvs(svs, rows, cols, M); // validate dimensions
diff --git a/test/random_without_cast_overflow.h b/test/random_without_cast_overflow.h index 7f1ea5f..715bb8e 100644 --- a/test/random_without_cast_overflow.h +++ b/test/random_without_cast_overflow.h
@@ -23,7 +23,7 @@ template <typename SrcScalar, typename TgtScalar> struct random_without_cast_overflow< SrcScalar, TgtScalar, - std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && + std::enable_if_t<NumTraits<SrcScalar>::IsInteger && NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsSigned && (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits || (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits && @@ -34,12 +34,27 @@ } }; +// Signed to unsigned integer widening cast. +template <typename SrcScalar, typename TgtScalar> +struct random_without_cast_overflow< + SrcScalar, TgtScalar, + std::enable_if_t<NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsSigned && NumTraits<TgtScalar>::IsInteger && + !NumTraits<TgtScalar>::IsSigned && + (std::numeric_limits<SrcScalar>::digits < std::numeric_limits<TgtScalar>::digits || + (std::numeric_limits<SrcScalar>::digits == std::numeric_limits<TgtScalar>::digits && + NumTraits<SrcScalar>::IsSigned))>> { + static SrcScalar value() { + SrcScalar a = internal::random<SrcScalar>(); + return a; + } +}; + // Integer to unsigned narrowing cast. template <typename SrcScalar, typename TgtScalar> struct random_without_cast_overflow< SrcScalar, TgtScalar, std::enable_if_t< - NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && !NumTraits<SrcScalar>::IsSigned && + NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && NumTraits<TgtScalar>::IsSigned && !NumTraits<SrcScalar>::IsSigned && (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> { static SrcScalar value() { TgtScalar b = internal::random<TgtScalar>(); @@ -47,6 +62,19 @@ } }; +// Integer to unsigned narrowing cast. +template <typename SrcScalar, typename TgtScalar> +struct random_without_cast_overflow< + SrcScalar, TgtScalar, + std::enable_if_t< + NumTraits<SrcScalar>::IsInteger && NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsSigned && !NumTraits<SrcScalar>::IsSigned && + (std::numeric_limits<SrcScalar>::digits > std::numeric_limits<TgtScalar>::digits)>> { + static SrcScalar value() { + TgtScalar b = internal::random<TgtScalar>(); + return static_cast<SrcScalar>(b); + } +}; + // Integer to signed narrowing cast. template <typename SrcScalar, typename TgtScalar> struct random_without_cast_overflow< @@ -83,7 +111,7 @@ struct random_without_cast_overflow< SrcScalar, TgtScalar, std::enable_if_t< - !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger && + !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger && NumTraits<TgtScalar>::IsSigned && (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>> { static SrcScalar value() { // NOTE: internal::random<T>() is limited by RAND_MAX, so random<int64_t> is always within that range. @@ -95,6 +123,22 @@ } }; +template <typename SrcScalar, typename TgtScalar> +struct random_without_cast_overflow< + SrcScalar, TgtScalar, + std::enable_if_t< + !NumTraits<SrcScalar>::IsInteger && !NumTraits<SrcScalar>::IsComplex && NumTraits<TgtScalar>::IsInteger && !NumTraits<TgtScalar>::IsSigned && + (std::numeric_limits<TgtScalar>::digits > std::numeric_limits<SrcScalar>::digits)>> { + static SrcScalar value() { + // NOTE: internal::random<T>() is limited by RAND_MAX, so random<int64_t> is always within that range. + // This prevents us from simply shifting bits, which would result in only 0 or -1. + // Instead, keep least-significant K bits and sign. + static const TgtScalar KeepMask = (static_cast<TgtScalar>(1) << std::numeric_limits<SrcScalar>::digits) - 1; + const TgtScalar a = internal::random<TgtScalar>(); + return static_cast<SrcScalar>(a & KeepMask); + } +}; + // Integer to floating-point, re-use above logic. template <typename SrcScalar, typename TgtScalar> struct random_without_cast_overflow<
diff --git a/test/stdvector.cpp b/test/stdvector.cpp index 2ca584b..1d44e42 100644 --- a/test/stdvector.cpp +++ b/test/stdvector.cpp
@@ -124,7 +124,7 @@ { typedef Eigen::Vector3f T; std::vector<T, Eigen::aligned_allocator<T> > v; - v.push_back(T()); + v.push_back(T(1.0f,2.0f,3.0f)); } EIGEN_DECLARE_TEST(stdvector)
diff --git a/test/svd_common.h b/test/svd_common.h index 6497470..e43adc4 100644 --- a/test/svd_common.h +++ b/test/svd_common.h
@@ -484,18 +484,15 @@ } template <typename MatrixType, int QRPreconditioner = 0> -void svd_option_checks(const MatrixType& input) { +void svd_thin_option_checks(const MatrixType& input) { MatrixType m(input.rows(), input.cols()); svd_fill_random(m); + svd_compute_checks<MatrixType, QRPreconditioner>(m); svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU>(m); svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinV>(m); svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeThinV>(m); - svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU>(m); - svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullV>(m); - svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeFullV>(m); - svd_compute_checks<MatrixType, QRPreconditioner | ComputeThinU | ComputeFullV>(m); svd_compute_checks<MatrixType, QRPreconditioner | ComputeFullU | ComputeThinV>(m);
diff --git a/test/triangular.cpp b/test/triangular.cpp index 2259869..0578b08 100644 --- a/test/triangular.cpp +++ b/test/triangular.cpp
@@ -55,6 +55,7 @@ r1(rows, cols), r2(rows, cols); VectorType v2 = VectorType::Random(rows); + VectorType v3 = VectorType::Zero(rows); MatrixType m1up = m1.template triangularView<Upper>(); MatrixType m2up = m2.template triangularView<Upper>(); @@ -96,23 +97,31 @@ Transpose<MatrixType> trm4(m4); // test back and forward substitution with a vector as the rhs m3 = m1.template triangularView<Upper>(); - VERIFY(v2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(v2)), largerEps)); + v3 = m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(v2)); + VERIFY(v2.isApprox(v3, largerEps)); m3 = m1.template triangularView<Lower>(); - VERIFY(v2.isApprox(m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(v2)), largerEps)); + v3 = m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(v2)); + VERIFY(v2.isApprox(v3, largerEps)); m3 = m1.template triangularView<Upper>(); - VERIFY(v2.isApprox(m3 * (m1.template triangularView<Upper>().solve(v2)), largerEps)); + v3 = m3 * (m1.template triangularView<Upper>().solve(v2)); + VERIFY(v2.isApprox(v3, largerEps)); m3 = m1.template triangularView<Lower>(); - VERIFY(v2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(v2)), largerEps)); + v3 = m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(v2)); + VERIFY(v2.isApprox(v3, largerEps)); // test back and forward substitution with a matrix as the rhs m3 = m1.template triangularView<Upper>(); - VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(m2)), largerEps)); + m4 = m3.adjoint() * (m1.adjoint().template triangularView<Lower>().solve(m2)); + VERIFY(m2.isApprox(m4, largerEps)); m3 = m1.template triangularView<Lower>(); - VERIFY(m2.isApprox(m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(m2)), largerEps)); + m4 = m3.transpose() * (m1.transpose().template triangularView<Upper>().solve(m2)); + VERIFY(m2.isApprox(m4, largerEps)); m3 = m1.template triangularView<Upper>(); - VERIFY(m2.isApprox(m3 * (m1.template triangularView<Upper>().solve(m2)), largerEps)); + m4 = m3 * (m1.template triangularView<Upper>().solve(m2)); + VERIFY(m2.isApprox(m4, largerEps)); m3 = m1.template triangularView<Lower>(); - VERIFY(m2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(m2)), largerEps)); + m4 = m3.conjugate() * (m1.conjugate().template triangularView<Lower>().solve(m2)); + VERIFY(m2.isApprox(m4, largerEps)); // check M * inv(L) using in place API m4 = m3;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index b830984..f873f2b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
@@ -360,7 +360,7 @@ int i; Index size = Index(1); for (i = 0; i < NumIndices; i++) { - internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); + internal::check_rows_cols_for_overflow<Dynamic, Dynamic, Dynamic>::run(size, dimensions[i]); size *= dimensions[i]; } #ifdef EIGEN_INITIALIZE_COEFFS
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index ee01d99..7e92487 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -998,8 +998,8 @@ enum { Vectorizable = packet_traits<Scalar>::Vectorizable, PacketSize = packet_traits<Scalar>::size, - HasHalfPacket = unpacket_traits<HalfPacket>::size < PacketSize, HalfPacketSize = unpacket_traits<HalfPacket>::size, + HasHalfPacket = static_cast<int>(HalfPacketSize) < static_cast<int>(PacketSize) }; public:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 55369e1..2fa6777 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -91,8 +91,8 @@ eigen_assert(rhs_block); BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn); char* block_mem = static_cast<char*>(d.allocate(sz.lhs_size + sz.rhs_size)); - *lhs_block = reinterpret_cast<LhsScalar*>(block_mem); - *rhs_block = reinterpret_cast<RhsScalar*>(block_mem + sz.lhs_size); + *lhs_block = static_cast<LhsScalar*>(static_cast<void*>(block_mem)); + *rhs_block = static_cast<RhsScalar*>(static_cast<void*>(block_mem + sz.lhs_size)); return block_mem; } @@ -115,12 +115,12 @@ for (Index x = 0; x < num_slices; x++) { if (num_lhs > 0) lhs_blocks[x].resize(num_lhs); for (Index m = 0; m < num_lhs; m++) { - lhs_blocks[x][m] = reinterpret_cast<LhsScalar*>(mem); + lhs_blocks[x][m] = static_cast<LhsScalar*>(static_cast<void*>(mem)); mem += sz.lhs_size; } if (num_rhs > 0) rhs_blocks[x].resize(num_rhs); for (Index n = 0; n < num_rhs; n++) { - rhs_blocks[x][n] = reinterpret_cast<RhsScalar*>(mem); + rhs_blocks[x][n] = static_cast<RhsScalar*>(static_cast<void*>(mem)); mem += sz.rhs_size; } }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index 8478cc3..7fb54b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
@@ -443,6 +443,14 @@ return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, j + m_horiz_offset); } + template <typename PacketT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacketPartial(Index i, Index j, Index, Index = 0) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket<PacketT,Alignment>(i, j); + } + return m_base_mapper.template loadPacket<PacketT,Alignment>(i + m_vert_offset, j + m_horiz_offset); + } + template <typename PacketT, int AlignmentType> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT loadPacket(Index i, Index j) const { if (UseDirectOffsets) { @@ -473,6 +481,8 @@ return SubMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const Index stride() const { return m_base_mapper.stride(); } + template <typename PacketT, int AlignmentType> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { EIGEN_STATIC_ASSERT((internal::is_same<PacketT, PacketT>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 6d9e9dc..914140d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -423,7 +423,7 @@ template <typename Dims1, typename Dims2, ptrdiff_t n> struct sizes_match_below_dim<Dims1, Dims2, n, n> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { - return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) && + return numext::equal_strict(array_get<n - 1>(dims1), array_get<n - 1>(dims2)) && sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2); } };
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 137b962..0b4ce1a 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT
@@ -181,15 +181,16 @@ typedef typename impl_type::Scalar Scalar; typedef typename impl_type::Complex Complex; - enum Flag { - Default=0, // goof proof - Unscaled=1, - HalfSpectrum=2, - // SomeOtherSpeedOptimization=4 - Speedy=32767 - }; + using Flag = int; + static constexpr Flag Default = 0; + static constexpr Flag Unscaled = 1; + static constexpr Flag HalfSpectrum = 2; + static constexpr Flag Speedy = 32767; - FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { } + FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) + { + eigen_assert((flags == Default || flags == Unscaled || flags == HalfSpectrum || flags == Speedy) && "invalid flags argument"); + } inline bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;}
diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp index 0955795..8452577 100644 --- a/unsupported/test/EulerAngles.cpp +++ b/unsupported/test/EulerAngles.cpp
@@ -9,6 +9,8 @@ #include "main.h" +EIGEN_DISABLE_DEPRECATED_WARNING + #include <unsupported/Eigen/EulerAngles> using namespace Eigen;
diff --git a/unsupported/test/NNLS.cpp b/unsupported/test/NNLS.cpp index b347c06..58092ef 100644 --- a/unsupported/test/NNLS.cpp +++ b/unsupported/test/NNLS.cpp
@@ -54,7 +54,7 @@ } template <typename MatrixType> -void test_nnls_random_problem() { +void test_nnls_random_problem(const MatrixType&) { // // SETUP // @@ -448,12 +448,9 @@ for (int i = 0; i < g_repeat; i++) { // Essential NNLS properties, across different types. - CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>()); - CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>()); - { - using MatFixed = Matrix<double, 12, 5>; - CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>()); - } + CALL_SUBTEST_2(test_nnls_random_problem(MatrixXf())); + CALL_SUBTEST_3(test_nnls_random_problem(MatrixXd())); + CALL_SUBTEST_4(test_nnls_random_problem(Matrix<double, 12, 5>())); CALL_SUBTEST_5(test_nnls_with_half_precision()); // Robustness tests:
diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp index 81d81ef..7976dcb 100644 --- a/unsupported/test/cxx11_tensor_casts.cpp +++ b/unsupported/test/cxx11_tensor_casts.cpp
@@ -30,7 +30,7 @@ for (int i = 0; i < 101; ++i) { for (int j = 0; j < 201; ++j) { - const ToType ref = static_cast<ToType>(ftensor(i, j)); + const ToType ref = internal::cast<FromType, ToType>(ftensor(i, j)); VERIFY_IS_EQUAL(ttensor(i, j), ref); } }
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index ce4e538..071046a 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -485,7 +485,7 @@ // Test against probabilistic forward error bound. In reality, the error is much smaller // when we use tree summation. double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum); - double tol = numext::sqrt(static_cast<double>(num_elements)) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum); + double tol = numext::sqrt(static_cast<double>(num_elements)) * static_cast<double>(NumTraits<ScalarType>::epsilon()) * abs_sum; VERIFY_LE(err, tol); } }
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp index 10beb07..7256dc0 100644 --- a/unsupported/test/mpreal_support.cpp +++ b/unsupported/test/mpreal_support.cpp
@@ -20,6 +20,7 @@ std::cerr << "highest = " << NumTraits<mpreal>::highest() << "\n"; std::cerr << "lowest = " << NumTraits<mpreal>::lowest() << "\n"; std::cerr << "digits10 = " << NumTraits<mpreal>::digits10() << "\n"; + std::cerr << "max_digits10 = " << NumTraits<mpreal>::max_digits10() << "\n"; for(int i = 0; i < g_repeat; i++) { int s = Eigen::internal::random<int>(1,100);